Files
flat_scraper/test_scraper.py
2026-02-15 10:04:41 +01:00

62 lines
1.6 KiB
Python

#!/usr/bin/env python3
"""
Test script for the NHG scraper
"""
import asyncio
import sys
import os
# Add src to path
sys.path.insert(0, os.path.join(os.path.dirname(__file__), 'src'))
from scrapers.nhg_scraper import NHGScraper
from storage.csv_storage import CSVStorage
async def test_nhg_scraper():
"""Test the NHG scraper with a single PLZ"""
print("Testing NHG Scraper...")
config = {
'name': 'NHG Test',
'url': 'https://nhg.at/immobilienangebot/wohnungsangebot/',
'search_params': {
'plz_list': ['1120 Wien'] # Test with full PLZ name
}
}
scraper = NHGScraper(config)
try:
results = await scraper.scrape()
print(f"Found {len(results)} results:")
for result in results:
print(f" PLZ: {result.get('plz')}")
print(f" Address: {result.get('address')}")
print(f" Link: {result.get('link')}")
print(f" Hash: {result.get('hash')}")
print("-" * 40)
# Test storage
storage = CSVStorage('test_data')
new_results, removed_results = storage.compare_results('nhg_test', results)
print(f"New results: {len(new_results)}")
print(f"Removed results: {len(removed_results)}")
# Save results
storage.save_results('nhg_test', results)
return True
except Exception as e:
print(f"Error: {e}")
import traceback
traceback.print_exc()
return False
if __name__ == "__main__":
success = asyncio.run(test_nhg_scraper())
sys.exit(0 if success else 1)