diff --git a/README.md b/README.md index 97b1e18..d6c6f7c 100644 --- a/README.md +++ b/README.md @@ -35,6 +35,7 @@ flat_scraper/ ├── .gitignore # Git ignore für .env und data/ ├── requirements.txt # Python Dependencies ├── Dockerfile # ARM64 optimiert +├── deploy.sh # Automatisches Deployment Script ├── docker-compose.yml # Automatisierung └── README.md ``` @@ -174,6 +175,232 @@ username: "${EMAIL_USERNAME:default@example.com}" # Mit Default ## Deployment auf Raspberry Pi +### Schritt-für-Schritt Anleitung + +#### Voraussetzungen +- Raspberry Pi 4+ (empfohlen) +- Docker & Docker Compose installiert +- Git für das Klonen des Repositories + +#### 1. Repository auf Raspberry Pi laden + +```bash +# Projekt klonen +git clone flat_scraper +cd flat_scraper + +# Oder manuell kopieren +scp -r ./flat_scraper pi@raspberry-pi:~/ +``` + +#### 2. Environment konfigurieren + +```bash +# Environment Vorlage kopieren +cp .env.example .env + +# Mit Editor öffnen +nano .env +# Oder vim .env +``` + +**Deine SMTP-Daten eintragen:** +```bash +SMTP_SERVER=dein-smtp-server.com +SMTP_PORT=587 +EMAIL_USERNAME=deine-email@domain.com +EMAIL_PASSWORD=dein-app-password +EMAIL_FROM=deine-email@domain.com +EMAIL_TO=empfänger@domain.com +EMAIL_SECURITY=starttls +``` + +#### 3. Docker Image bauen + +```bash +# Image für ARM64 bauen (kann einige Minuten dauern) +docker build -t flat-scraper . + +# Build-Status prüfen +docker images | grep flat-scraper +``` + +**Fehlerbehandlung:** +```bash +# Falls Docker Daemon nicht läuft: +sudo systemctl start docker +sudo systemctl enable docker + +# Falls Podman-Socket Fehler: +# Prüfen ob Docker oder Podman aktiv ist: +which docker +which podman + +# Docker Socket prüfen: +ls -la /var/run/docker.sock +sudo chmod 666 /var/run/docker.sock + +# Falls nötig, Docker neu starten: +sudo systemctl restart docker + +# Environment Variablen Fehler: +# Prüfen ob .env Datei existiert und korrekt ist: +ls -la .env +cat .env + +# Falls SMTP_PORT nicht gesetzt ist: +echo "SMTP_PORT=587" >> .env + +# Permission Fehler mit data/ Verzeichnis: +sudo chown -R $USER:$USER data/ +chmod 755 data/ +``` + +#### 4. Erster Testlauf + +```bash +# Einmaligen Scraper-Lauf testen +docker run --rm \ + --env-file $(pwd)/.env \ + -v $(pwd)/data:/app/data \ + -v $(pwd)/src/config:/app/src/config \ + flat-scraper python src/main.py +``` + +**Erwartete Ausgabe:** +``` +2026-02-15 10:30:00 - INFO - Starting flat scraper run +2026-02-15 10:30:00 - INFO - Start scraping nhg +Scraping PLZ 1120 Wien... +Scraping PLZ 1140 Wien... +Scraping PLZ 1150 Wien... +Scraping PLZ 1160 Wien... +2026-02-15 10:30:15 - INFO - Found X results for nhg +Email-Benachrichtigung gesendet an 1 Empfänger +2026-02-15 10:30:15 - INFO - Scraping completed: 1/1 sites successful +``` + +#### 5. Docker Compose konfigurieren + +```bash +# ARM64 Support aktivieren (wichtig für Raspberry Pi) +sed -i 's/# platform: linux/arm64/platform: linux/arm64/' docker-compose.yml + +# Konfiguration prüfen +cat docker-compose.yml +``` + +#### 6. Production Deployment + +```bash +# Manuelles Deployment +docker-compose up -d + +# Status prüfen +docker-compose ps + +# ODER: Automatisches Deployment Script verwenden +./deploy.sh +``` + +**Deployment Script Features:** +- ✅ **Automatisches Build** mit Fehlerprüfung +- ✅ **Image Cleanup** für Speicherplatz +- ✅ **Status Überwachung** vor/nach Deployment +- ✅ **Flexible Optionen** für verschiedene Szenarien + +```bash +# Vollständiges Deployment mit Cleanup +./deploy.sh + +# Deployment ohne Cleanup (für schnelle Tests) +./deploy.sh --no-cleanup + +# Nur Services neu starten (kein Build) +./deploy.sh --no-build + +# Verbose Output für Debugging +./deploy.sh --verbose + +# Hilfe anzeigen +./deploy.sh --help +``` + +**Erwartete Ausgabe:** +``` +NAME COMMAND SERVICE STATUS PORTS +flat_scraper-flat-scraper-1 "python src/main.py" flat-scraper running +flat_scraper-scheduler-1 "python -c 'import..." scheduler running +``` + +#### 7. Logs überwachen + +```bash +# Live Logs ansehen +docker-compose logs -f + +# Nur Scraper Logs +docker-compose logs -f flat-scraper + +# Nur Scheduler Logs +docker-compose logs -f scheduler +``` + +#### 8. Automatisierung verifizieren + +```bash +# Prüfen ob Scheduler läuft +docker-compose logs scheduler | grep "Scheduler started" + +# Nächsten Lauf prüfen (alle 6 Stunden) +docker-compose logs scheduler | tail -10 +``` + +#### 9. Daten persistenz prüfen + +```bash +# CSV Dateien prüfen +ls -la data/ + +# Inhalt ansehen +cat data/nhg_results.csv + +# Letzte Ergebnisse +tail -5 data/nhg_results.csv +``` + +#### 10. Wartung und Updates + +```bash +# Services stoppen +docker-compose down + +# Code aktualisieren +git pull origin main + +# Automatisches Deployment mit Script +./deploy.sh + +# ODER manueller Weg: +# Neues Image bauen +docker build -t flat-scraper . + +# Services neu starten +docker-compose up -d + +# Alte Images aufräumen +docker image prune -f +``` + +**Deployment Script für Updates:** +```bash +# Vollständiges Update mit Cleanup +./deploy.sh + +# Schneller Restart ohne Build +./deploy.sh --no-build --no-cleanup +``` + ### ARM64 Support Der Dockerfile ist für ARM64 optimiert: @@ -206,6 +433,53 @@ RUN apt-get update && apt-get install -y chromium 2. **Keine Ergebnisse**: PLZ nicht verfügbar oder Website geändert 3. **Email funktioniert nicht**: SMTP-Einstellungen und Security prüfen 4. **Environment nicht geladen**: `.env` Datei prüfen und Rechte +5. **Docker Socket Fehler**: Podman vs Docker Konflikt +6. **TypeError: int() argument must be a string**: Environment Variable fehlt oder hat Default-Wert nicht +7. **Permission denied: data/nhg_results.csv**: data/ Verzeichnis gehört falschem User + +### Docker/Podman Konflikt lösen + +```bash +# 1. Prüfen welcher Container-Manager aktiv: +echo $CONTAINER_MANAGER +which docker +which podman + +# 2. Docker als Standard setzen (falls nötig): +export DOCKER_HOST=unix:///var/run/docker.sock + +# 3. Podman deaktivieren (falls gewünscht): +sudo systemctl disable podman +sudo systemctl stop podman + +# 4. Docker Socket Rechte prüfen: +sudo ls -la /var/run/docker.sock +sudo usermod -aG docker $USER # User zur Docker Gruppe hinzufügen +newgrp docker # Gruppe neu laden + +# 5. System neu starten nach Änderungen: +sudo systemctl restart docker +``` + +### Raspberry Pi spezifische Probleme + +```bash +# ARM64 Architektur prüfen: +uname -m +# Sollte: aarch64 oder armv7l + +# Docker Architektur prüfen: +docker version --format '{{.Server.Arch}}' + +# Falls x86_64 Images auf ARM64 laufen sollen: +# docker run --platform linux/arm64 flat-scraper + +# Memory prüfen (mindestens 1GB empfohlen): +free -h + +# Speicherplatz prüfen (mindestens 2GB frei): +df -h +``` ### Debugging diff --git a/deploy.sh b/deploy.sh new file mode 100755 index 0000000..1011607 --- /dev/null +++ b/deploy.sh @@ -0,0 +1,300 @@ +#!/bin/bash + +# Flat Scraper Deployment Script for Raspberry Pi +# Usage: ./deploy.sh [options] + +set -e # Exit on any error + +# Colors for output +RED='\033[0;31m' +GREEN='\033[0;32m' +YELLOW='\033[1;33m' +BLUE='\033[0;34m' +NC='\033[0m' # No Color + +# Default values +CLEANUP=true +SKIP_BUILD=false +SKIP_DEPLOY=false +VERBOSE=false + +# Help function +show_help() { + echo "Flat Scraper Deployment Script" + echo "" + echo "Usage: $0 [options]" + echo "" + echo "Options:" + echo " -h, --help Show this help message" + echo " -c, --no-cleanup Skip image cleanup after build" + echo " -b, --no-build Skip building new image" + echo " -d, --no-deploy Skip docker-compose deployment" + echo " -v, --verbose Verbose output" + echo "" + echo "Examples:" + echo " $0 # Full deployment with cleanup" + echo " $0 --no-cleanup # Deploy without cleanup" + echo " $0 --no-build # Deploy existing image" + echo "" +} + +# Parse command line arguments +while [[ $# -gt 0 ]]; do + case $1 in + -h|--help) + show_help + exit 0 + ;; + -c|--no-cleanup) + CLEANUP=false + shift + ;; + -b|--no-build) + SKIP_BUILD=true + shift + ;; + -d|--no-deploy) + SKIP_DEPLOY=true + shift + ;; + -v|--verbose) + VERBOSE=true + shift + ;; + *) + echo -e "${RED}Unknown option: $1${NC}" + show_help + exit 1 + ;; + esac +done + +# Logging functions +log_info() { + echo -e "${BLUE}[INFO]${NC} $1" +} + +log_success() { + echo -e "${GREEN}[SUCCESS]${NC} $1" +} + +log_warning() { + echo -e "${YELLOW}[WARNING]${NC} $1" +} + +log_error() { + echo -e "${RED}[ERROR]${NC} $1" +} + +# Check prerequisites +check_prerequisites() { + log_info "Checking prerequisites..." + + # Check if Docker is running + if ! docker info >/dev/null 2>&1; then + log_error "Docker is not running. Please start Docker first." + exit 1 + fi + + # Check if docker-compose is available + if ! command -v docker-compose >/dev/null 2>&1; then + log_error "docker-compose is not installed." + exit 1 + fi + + # Check if .env file exists + if [ ! -f ".env" ]; then + log_error ".env file not found. Please create it from .env.example" + exit 1 + fi + + # Check disk space + available_space=$(df . | tail -1 | awk '{print $4}') + if [ "$available_space" -lt 1048576 ]; then # 1GB in KB + log_warning "Low disk space detected: ${available_space}KB available" + fi + + log_success "Prerequisites check passed" +} + +# Show current status +show_status() { + log_info "Current Docker status:" + echo "" + + # Show images + log_info "Docker Images:" + docker images | grep -E "(flat-scraper|REPOSITORY)" || echo "No flat-scraper images found" + echo "" + + # Show containers + log_info "Running Containers:" + docker ps --format "table {{.Names}}\t{{.Status}}\t{{.Image}}" | grep -E "(flat-scraper|CONTAINER)" || echo "No flat-scraper containers running" + echo "" + + # Show storage usage + log_info "Docker Storage Usage:" + docker system df --format "table {{.Type}}\t{{.TotalCount}}\t{{.Size}}" + echo "" +} + +# Build Docker image +build_image() { + if [ "$SKIP_BUILD" = true ]; then + log_info "Skipping build as requested" + return + fi + + log_info "Building Docker image..." + + # Build with verbose output if requested + if [ "$VERBOSE" = true ]; then + docker build --progress=plain -t flat-scraper . + else + docker build -t flat-scraper . + fi + + # Check if build was successful + if [ $? -eq 0 ]; then + log_success "Docker image built successfully" + + # Show image size + image_size=$(docker images flat-scraper:latest --format "{{.Size}}") + log_info "Image size: $image_size" + else + log_error "Docker build failed" + exit 1 + fi +} + +# Test the image +test_image() { + log_info "Testing Docker image..." + + # Quick test to see if image starts and Python works + if docker run --rm flat-scraper python --version >/dev/null 2>&1; then + log_success "Image test passed" + else + log_error "Image test failed" + exit 1 + fi +} + +# Cleanup old images +cleanup_images() { + if [ "$CLEANUP" = false ]; then + log_info "Skipping cleanup as requested" + return + fi + + log_info "Cleaning up old Docker images..." + + # Remove dangling images + dangling_removed=$(docker image prune -f | grep "Total reclaimed space" | awk '{print $3}') + + # Remove all unused images (be more aggressive) + if [ "$VERBOSE" = true ]; then + log_info "Removing all unused images..." + unused_removed=$(docker image prune -a -f | grep "Total reclaimed space" | awk '{print $3}' || echo "0B") + fi + + log_success "Cleanup completed" + if [ -n "$dangling_removed" ]; then + log_info "Reclaimed space: $dangling_removed" + fi +} + +# Deploy with docker-compose +deploy_services() { + if [ "$SKIP_DEPLOY" = true ]; then + log_info "Skipping deployment as requested" + return + fi + + log_info "Deploying services with docker-compose..." + + # Stop existing services + if [ "$VERBOSE" = true ]; then + log_info "Stopping existing services..." + docker-compose down + fi + + # Start services + if [ "$VERBOSE" = true ]; then + docker-compose up -d --build + else + docker-compose up -d + fi + + # Check if services started successfully + sleep 5 # Give services time to start + + running_containers=$(docker-compose ps | grep "Up" | wc -l) + total_containers=$(docker-compose ps | grep -c "flat-scraper") + + if [ "$running_containers" -eq "$total_containers" ] && [ "$total_containers" -gt 0 ]; then + log_success "All services deployed successfully" + log_info "Running containers: $running_containers/$total_containers" + else + log_error "Some services failed to start" + docker-compose ps + exit 1 + fi +} + +# Show final status +show_final_status() { + log_info "Final deployment status:" + echo "" + + # Show running containers + log_info "Running services:" + docker-compose ps + echo "" + + # Show recent logs + log_info "Recent logs (last 10 lines):" + docker-compose logs --tail=10 + echo "" + + # Show storage usage + log_info "Final storage usage:" + docker system df --format "table {{.Type}}\t{{.TotalCount}}\t{{.Size}}" + echo "" + + log_success "Deployment completed successfully!" + log_info "Your Flat Scraper is now running and will check every 6 hours." +} + +# Main execution +main() { + echo -e "${BLUE}========================================${NC}" + echo -e "${BLUE} Flat Scraper Deployment Script ${NC}" + echo -e "${BLUE}========================================${NC}" + echo "" + + # Show initial status + show_status + + # Run deployment steps + check_prerequisites + build_image + test_image + cleanup_images + deploy_services + show_final_status + + echo "" + log_success "🎉 Flat Scraper deployment completed!" + echo "" + log_info "Useful commands:" + echo " docker-compose logs -f # Follow logs" + echo " docker-compose ps # Check status" + echo " docker-compose restart # Restart services" + echo " docker-compose down # Stop services" + echo " ./deploy.sh --no-build --no-cleanup # Quick restart" + echo "" +} + +# Run main function +main "$@" diff --git a/src/config/sites.yaml b/src/config/sites.yaml index 93d2d41..9b2c79b 100644 --- a/src/config/sites.yaml +++ b/src/config/sites.yaml @@ -18,8 +18,8 @@ sites: notification: email: enabled: true # Set to true to enable email notifications - smtp_server: "${SMTP_SERVER}" - smtp_port: "${SMTP_PORT}" + smtp_server: "${SMTP_SERVER:smtp.gmail.com}" + smtp_port: "${SMTP_PORT:587}" username: "${EMAIL_USERNAME}" password: "${EMAIL_PASSWORD}" from_email: "${EMAIL_FROM}" diff --git a/src/notifier/email_notifier.py b/src/notifier/email_notifier.py index 2003550..31ed6a7 100644 --- a/src/notifier/email_notifier.py +++ b/src/notifier/email_notifier.py @@ -90,11 +90,19 @@ class EmailNotifier: address = result.get('address', 'N/A') link = result.get('link', '#') + # Handle empty or None links + if not link or link == '#' or link == 'None': + link_display = 'Kein Link' + link_href = '#' + else: + link_display = 'Details' + link_href = link + html += f""" {plz} {address} - Details + {link_display} """ diff --git a/src/scrapers/nhg_scraper.py b/src/scrapers/nhg_scraper.py index 365d688..4db237b 100644 --- a/src/scrapers/nhg_scraper.py +++ b/src/scrapers/nhg_scraper.py @@ -47,21 +47,35 @@ class NHGScraper(BaseScraper): address_pattern = r'(\d{4}\s+Wien,\s*[^,\n]+)' addresses = re.findall(address_pattern, content) - for address in addresses: + # Panels mit data-url finden (bessere Link-Extraktion) + panels = [] + try: + panel_elements = await page.locator('#UnitsList .panel[data-url]').all() + for element in panel_elements: + data_url = await element.get_attribute('data-url') + panel_text = await element.text_content() + if data_url and panel_text: + panels.append({'url': data_url, 'text': panel_text}) + except: + pass + + # Adressen mit Links verbinden + for i, address in enumerate(addresses): address = address.strip() if address: - # Details Link suchen - details_link = None - try: - # Suche nach Details Link nach der Adresse - details_elements = await page.locator('#UnitsList a').all() - for element in details_elements: - link_text = await element.text_content() - if 'Details' in link_text: - details_link = await element.get_attribute('href') - break - except: - pass + # Versuche, den passenden Link zu finden + details_link = '#' + + # Strategie 1: Panel mit passender Adresse finden + for panel in panels: + if address in panel['text']: + # data-url zu voller URL machen + data_url = panel['url'] + if data_url.startswith('/'): + details_link = f"https://nhg.at{data_url}" + else: + details_link = data_url + break result = { 'plz': plz.split()[0], # Nur die PLZ, ohne "Wien"