From 1ff8ee2c26e39802b7d57434b8ceab8b0d158d58 Mon Sep 17 00:00:00 2001 From: Erol Haagenrud Date: Mon, 4 May 2026 15:30:29 +0200 Subject: [PATCH] =?UTF-8?q?F=C3=B8r=20endringer=20i=20hcp-kalkulator?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- backend/main.py | 8 +++-- backend/scrape_golfpakker.py | 9 ++++- backend/scrape_greenfee.py | 14 ++++++-- backend/scrape_membership.py | 14 ++++++-- backend/scrape_status.py | 33 ++++++++++++++----- backend/scrape_utils.py | 18 ++++++++++ backend/scrape_vtg.py | 14 ++++++-- frontend/src/app/admin/page.tsx | 8 ++++- .../src/components/ScrapeMethodSelect.tsx | 10 ++++-- 9 files changed, 106 insertions(+), 22 deletions(-) diff --git a/backend/main.py b/backend/main.py index cf5b322..fe2f6d0 100644 --- a/backend/main.py +++ b/backend/main.py @@ -5810,6 +5810,10 @@ async def update_scrape_settings(facility_id: int, settings: ScrapeSettingsUpdat if not facility: raise HTTPException(status_code=404, detail="Anlegget finnes ikke.") + normalized_scrape_method = settings.scrape_method + if isinstance(normalized_scrape_method, str) and not normalized_scrape_method.strip(): + normalized_scrape_method = "disabled" + # Oppdater verdiene i databasen inkludert AI instruks await conn.execute(""" UPDATE facilities @@ -5819,14 +5823,14 @@ async def update_scrape_settings(facility_id: int, settings: ScrapeSettingsUpdat ai_instruction = $4 WHERE id = $5 """, - settings.scrape_method, + normalized_scrape_method, settings.scrape_status_url, settings.scrape_status_selector, settings.ai_instruction, facility_id) # Hvis metoden er manuell, tvinger vi gjennom de nye banestatusene direkte - if settings.scrape_method == 'manual' and settings.courses: + if normalized_scrape_method == 'manual' and settings.courses: for c in settings.courses: current_course = await conn.fetchrow( "SELECT id, facility_id, status FROM courses WHERE id = $1 AND facility_id = $2", diff --git a/backend/scrape_golfpakker.py b/backend/scrape_golfpakker.py index 4cd3cec..0e702e9 100644 --- a/backend/scrape_golfpakker.py +++ b/backend/scrape_golfpakker.py @@ -19,7 +19,13 @@ from dotenv import load_dotenv from env_config import get_database_url from playwright.async_api import async_playwright -from scrape_utils import ProgressCallback, emit_progress, make_progress_event, parse_llm_json +from scrape_utils import ( + ProgressCallback, + emit_progress, + exclude_discontinued_facilities_clause, + make_progress_event, + parse_llm_json, +) load_dotenv() @@ -194,6 +200,7 @@ async def run_golfpakker_scraper(facility_ids=None, progress_callback: ProgressC FROM facilities WHERE COALESCE(NULLIF(TRIM(golfpakker_url), ''), NULLIF(TRIM(website_url), '')) IS NOT NULL """ + query += exclude_discontinued_facilities_clause("facilities") if facility_ids: query += f" AND id IN ({','.join(map(str, facility_ids))})" diff --git a/backend/scrape_greenfee.py b/backend/scrape_greenfee.py index aca917d..e8e6ccb 100644 --- a/backend/scrape_greenfee.py +++ b/backend/scrape_greenfee.py @@ -16,7 +16,13 @@ from playwright.async_api import async_playwright import google.generativeai as genai from dotenv import load_dotenv from env_config import get_database_url -from scrape_utils import ProgressCallback, emit_progress, make_progress_event, parse_llm_json +from scrape_utils import ( + ProgressCallback, + emit_progress, + exclude_discontinued_facilities_clause, + make_progress_event, + parse_llm_json, +) load_dotenv() @@ -110,7 +116,11 @@ async def run_greenfee_scraper(facility_ids=None, progress_callback: ProgressCal failed_count = 0 try: - query = "SELECT id, name, greenfee_url FROM facilities WHERE greenfee_url IS NOT NULL AND greenfee_url != ''" + query = ( + "SELECT id, name, greenfee_url FROM facilities " + "WHERE greenfee_url IS NOT NULL AND greenfee_url != ''" + f"{exclude_discontinued_facilities_clause('facilities')}" + ) if facility_ids: query += f" AND id IN ({','.join(map(str, facility_ids))})" diff --git a/backend/scrape_membership.py b/backend/scrape_membership.py index 5aba13b..44e4e2b 100644 --- a/backend/scrape_membership.py +++ b/backend/scrape_membership.py @@ -17,7 +17,13 @@ from playwright.async_api import async_playwright import google.generativeai as genai from dotenv import load_dotenv from env_config import get_database_url -from scrape_utils import ProgressCallback, emit_progress, make_progress_event, parse_llm_json +from scrape_utils import ( + ProgressCallback, + emit_progress, + exclude_discontinued_facilities_clause, + make_progress_event, + parse_llm_json, +) load_dotenv() @@ -105,7 +111,11 @@ async def run_scraper(facility_ids=None, progress_callback: ProgressCallback | N failed_count = 0 try: - query = "SELECT id, name, medlemskap_url FROM facilities WHERE medlemskap_url IS NOT NULL AND medlemskap_url != ''" + query = ( + "SELECT id, name, medlemskap_url FROM facilities " + "WHERE medlemskap_url IS NOT NULL AND medlemskap_url != ''" + f"{exclude_discontinued_facilities_clause('facilities')}" + ) if facility_ids: query += f" AND id IN ({','.join(map(str, facility_ids))})" diff --git a/backend/scrape_status.py b/backend/scrape_status.py index 07ddf32..7200624 100644 --- a/backend/scrape_status.py +++ b/backend/scrape_status.py @@ -17,11 +17,17 @@ from google import genai from dotenv import load_dotenv from course_status_history import ensure_course_status_history_table, log_course_status_change from env_config import get_database_url -from scrape_utils import ProgressCallback, emit_progress, make_progress_event +from scrape_utils import ( + ProgressCallback, + emit_progress, + exclude_discontinued_facilities_clause, + make_progress_event, +) load_dotenv() DB_URL = get_database_url() +DISABLED_STATUS_METHODS = {"", "disabled", "manual"} # ========================================== # KONFIGURERER GEMINI AI (NY SDK) @@ -154,17 +160,18 @@ async def run_daily_scraping(facility_ids=None, progress_callback: ProgressCallb print(f"🚀 Starter sjekk {datetime.now().strftime('%H:%M:%S')}...") conn = await asyncpg.connect(DB_URL) await ensure_course_status_history_table(conn) + facility_filter = exclude_discontinued_facilities_clause("facilities") if facility_ids: print(f"📌 Kjører skraping KUN for anlegg-ID(er): {facility_ids}") facilities = await conn.fetch( - "SELECT id, name, scrape_status_url, scrape_status_selector, scrape_method, ai_instruction FROM facilities WHERE scrape_status_url IS NOT NULL AND id = ANY($1::int[])", + f"SELECT id, name, scrape_status_url, scrape_status_selector, scrape_method, ai_instruction FROM facilities WHERE scrape_status_url IS NOT NULL AND id = ANY($1::int[]){facility_filter}", facility_ids ) else: print("🌍 Kjører skraping for ALLE anlegg med scrape_status_url...") facilities = await conn.fetch( - "SELECT id, name, scrape_status_url, scrape_status_selector, scrape_method, ai_instruction FROM facilities WHERE scrape_status_url IS NOT NULL" + f"SELECT id, name, scrape_status_url, scrape_status_selector, scrape_method, ai_instruction FROM facilities WHERE scrape_status_url IS NOT NULL{facility_filter}" ) if not facilities: @@ -207,7 +214,9 @@ async def run_daily_scraping(facility_ids=None, progress_callback: ProgressCallb context = await browser.new_context() for index, f in enumerate(facilities, start=1): - method = f.get('scrape_method') or 'css_selector' + raw_method = (f.get('scrape_method') or "").strip() + method = raw_method or 'css_selector' + method_label = "disabled" if raw_method in {"", "disabled"} else method facility_id = f['id'] facility_name = f['name'] @@ -219,15 +228,21 @@ async def run_daily_scraping(facility_ids=None, progress_callback: ProgressCallb facility_id=facility_id, facility_name=facility_name, outcome="info", - message=f"Starter sjekk med metode {method}.", + message=f"Starter sjekk med metode {method_label}.", processed=index - 1, total=total_facilities, ), ) - if method == 'manual': - successes.append(f"⏸️ {f['name']}: Hoppet over (Manuell overstyring)") - print(f" ⏸️ Hopper over skraping av {f['name']} (Satt til Manuell)") + if raw_method in DISABLED_STATUS_METHODS: + skip_reason = "Manuell overstyring" if raw_method == "manual" else "Skraping avslått" + progress_message = ( + "Hoppet over fordi anlegget er satt til manuell overstyring." + if raw_method == "manual" + else "Hoppet over fordi banestatusskraping er avslått for anlegget." + ) + successes.append(f"⏸️ {f['name']}: Hoppet over ({skip_reason})") + print(f" ⏸️ Hopper over skraping av {f['name']} ({skip_reason})") skipped_facilities += 1 await emit_progress( progress_callback, @@ -241,7 +256,7 @@ async def run_daily_scraping(facility_ids=None, progress_callback: ProgressCallb facility_id=facility_id, facility_name=facility_name, outcome="warning", - message="Hoppet over fordi anlegget er satt til manuell overstyring.", + message=progress_message, processed=index, total=total_facilities, ), diff --git a/backend/scrape_utils.py b/backend/scrape_utils.py index ec5e11f..32c63e0 100755 --- a/backend/scrape_utils.py +++ b/backend/scrape_utils.py @@ -4,6 +4,24 @@ from typing import Any, Awaitable, Callable ProgressCallback = Callable[[dict[str, Any]], Awaitable[None]] +def exclude_discontinued_facilities_clause(facility_table: str = "facilities") -> str: + return f""" + AND ( + NOT EXISTS ( + SELECT 1 + FROM courses course_filter + WHERE course_filter.facility_id = {facility_table}.id + ) + OR EXISTS ( + SELECT 1 + FROM courses course_filter + WHERE course_filter.facility_id = {facility_table}.id + AND COALESCE(course_filter.status, 'ukjent') <> 'nedlagt' + ) + ) + """ + + async def emit_progress(progress_callback: ProgressCallback | None, **payload: Any) -> None: if progress_callback is None: return diff --git a/backend/scrape_vtg.py b/backend/scrape_vtg.py index 4cd0d9c..6bef4bd 100644 --- a/backend/scrape_vtg.py +++ b/backend/scrape_vtg.py @@ -16,7 +16,13 @@ from playwright.async_api import async_playwright import google.generativeai as genai from dotenv import load_dotenv from env_config import get_database_url -from scrape_utils import ProgressCallback, emit_progress, make_progress_event, parse_llm_json +from scrape_utils import ( + ProgressCallback, + emit_progress, + exclude_discontinued_facilities_clause, + make_progress_event, + parse_llm_json, +) from vtg_courses import filter_upcoming_courses load_dotenv() @@ -110,7 +116,11 @@ async def run_vtg_scraper(facility_ids=None, progress_callback: ProgressCallback failed_count = 0 try: - query = "SELECT id, name, vtg_lenke FROM facilities WHERE vtg_lenke IS NOT NULL AND vtg_lenke != ''" + query = ( + "SELECT id, name, vtg_lenke FROM facilities " + "WHERE vtg_lenke IS NOT NULL AND vtg_lenke != ''" + f"{exclude_discontinued_facilities_clause('facilities')}" + ) if facility_ids: query += f" AND id IN ({','.join(map(str, facility_ids))})" diff --git a/frontend/src/app/admin/page.tsx b/frontend/src/app/admin/page.tsx index d832220..a93be02 100644 --- a/frontend/src/app/admin/page.tsx +++ b/frontend/src/app/admin/page.tsx @@ -197,6 +197,11 @@ const EMPTY_MANUAL_OVERRIDE_FORM: ManualOverrideForm = { vtg_datoer: [], }; +function normalizeScrapeMethod(scrapeMethod: string | null | undefined) { + if (typeof scrapeMethod === 'string' && scrapeMethod.trim() === '') return 'disabled'; + return scrapeMethod ?? 'css_selector'; +} + const toInputString = (value: unknown) => { if (value === null || value === undefined) return ''; return String(value); @@ -709,7 +714,7 @@ export default function AdminDashboard() { setEditForm({ scrape_status_url: facility.scrape_status_url || '', scrape_status_selector: facility.scrape_status_selector || '', - scrape_method: facility.scrape_method || 'css_selector', + scrape_method: normalizeScrapeMethod(facility.scrape_method), ai_instruction: facility.ai_instruction || '', courses: facility.course_statuses ? facility.course_statuses.map((c: any) => ({id: c.id, name: c.name, status: c.status})) : [] }); @@ -950,6 +955,7 @@ export default function AdminDashboard() { + diff --git a/frontend/src/components/ScrapeMethodSelect.tsx b/frontend/src/components/ScrapeMethodSelect.tsx index 0656438..3c7d042 100644 --- a/frontend/src/components/ScrapeMethodSelect.tsx +++ b/frontend/src/components/ScrapeMethodSelect.tsx @@ -11,9 +11,13 @@ interface Facility { scrape_status_selector?: string; } +function getInitialMethod(scrapeMethod?: string | null) { + if (typeof scrapeMethod === 'string' && scrapeMethod.trim() === '') return 'disabled'; + return scrapeMethod ?? 'css_selector'; +} + export default function ScrapeMethodSelect({ facility }: { facility: Facility }) { - // Setter standardverdi til 'css_selector' hvis den er tom i databasen - const [method, setMethod] = useState(facility.scrape_method || 'css_selector'); + const [method, setMethod] = useState(getInitialMethod(facility.scrape_method)); const [isLoading, setIsLoading] = useState(false); const [statusColor, setStatusColor] = useState('bg-transparent'); // For å gi visuell feedback @@ -66,7 +70,7 @@ export default function ScrapeMethodSelect({ facility }: { facility: Facility }) - + ); }