diff --git a/backend/main.py b/backend/main.py index 5688610..7fb9d7e 100644 --- a/backend/main.py +++ b/backend/main.py @@ -9,7 +9,7 @@ LOV: Aldri trunker eller slett logikk for "effektivitet". --------------------------------------------------------------------------- """ -from fastapi import FastAPI, HTTPException, Response, Cookie, Depends, Request, BackgroundTasks +from fastapi import FastAPI, HTTPException, Response, Request, Query from fastapi.middleware.cors import CORSMiddleware from contextlib import asynccontextmanager import asyncpg @@ -21,10 +21,15 @@ from jose import jwt, JWTError from passlib.context import CryptContext from dotenv import load_dotenv -# NYE IMPORTER FOR ADMIN PANELET OG BAKGRUNNSJOBBER from pydantic import BaseModel from typing import Optional, List, Any -import subprocess + +from scrape_jobs import ( + SCRAPE_JOB_TYPES, + enqueue_scrape_job, + ensure_scrape_jobs_table, + list_scrape_jobs, +) load_dotenv() @@ -92,7 +97,10 @@ def format_row(row): d = dict(row) - for key in ['status_updated_at', 'created_at', 'slope_valid_until', 'membership_updated_at']: + for key in [ + 'status_updated_at', 'created_at', 'slope_valid_until', + 'membership_updated_at', 'greenfee_updated_at', 'vtg_updated_at' + ]: if isinstance(d.get(key), (date, datetime)): d[key] = d[key].isoformat() @@ -101,7 +109,8 @@ def format_row(row): 'faqs', 'shotzoom', 'social_links', 'holes', 'golfpakker', 'cooperating_clubs', 'vtg_datoer' ] json_dict_fields = [ - 'amenities', 'vtg', 'nsg_data', 'golfamore_data', 'membership_draft' + 'amenities', 'vtg', 'nsg_data', 'golfamore_data', + 'membership_draft', 'greenfee_draft', 'vtg_draft' ] for field in json_list_fields: @@ -132,38 +141,24 @@ def format_row(row): return d -# --- BAKGRUNNSARBEIDER: FUNKSJON SOM KJØRER SKRAPEREN I BAKGRUNNEN --- -def run_scrape_worker(facility_ids: List[int]): - """ - Kjører selve skraping-scriptet i bakgrunnen. - Slik kan frontenden få et umiddelbart svar, mens skraperen jobber. - """ - print(f"🔄 STARTER BAKGRUNNSSKRAPING FOR FØLGENDE IDER: {facility_ids}") - - try: - ids_arg = ",".join(map(str, facility_ids)) - - # NYTT: Bruker "python -u" for LIVE logging, og fjerner "> /dev/null 2>&1" - command = f"python -u scrape_status.py --ids {ids_arg}" - - subprocess.run(command, shell=True, check=True) - - print(f"✅ BAKGRUNNSSKRAPING FULLFØRT FOR IDER: {facility_ids}") - except subprocess.CalledProcessError as e: - print(f"❌ FEIL UNDER BAKGRUNNSSKRAPING: {e}") - except Exception as e: - print(f"🔥 UFORUTSETT FEIL UNDER BAKGRUNNSSKRAPING: {e}") +async def queue_scrape_job(job_type: str, facility_ids: List[int]): + if job_type not in SCRAPE_JOB_TYPES: + raise HTTPException(status_code=400, detail=f"Ugyldig jobbtype: {job_type}") + if not facility_ids: + raise HTTPException(status_code=400, detail="Ingen anleggs-IDer ble oppgitt.") -def run_membership_worker(facility_ids: List[int]): - """Kjører medlemskap-skraperen i bakgrunnen.""" - print(f"🔄 STARTER MEDLEMSKAP-SKRAPING FOR IDER: {facility_ids}") - try: - ids_arg = ",".join(map(str, facility_ids)) - command = f"python -u scrape_membership.py --ids {ids_arg}" - subprocess.run(command, shell=True, check=True) - print(f"✅ MEDLEMSKAP-SKRAPING FULLFØRT FOR IDER: {facility_ids}") - except Exception as e: - print(f"🔥 FEIL UNDER MEDLEMSKAP-SKRAPING: {e}") + job, was_created = await enqueue_scrape_job(app.state.pool, job_type, facility_ids) + status = "queued" if was_created else "already_queued" + message = ( + f"{job_type.capitalize()}-skraping for {len(job['facility_ids'])} anlegg ble lagt i kø." + if was_created + else f"Fant allerede en aktiv {job_type}-jobb for samme anlegg." + ) + return { + "status": status, + "message": message, + "job": job, + } @asynccontextmanager @@ -177,6 +172,8 @@ async def lifespan(app: FastAPI): max_size=20, command_timeout=60 ) + async with app.state.pool.acquire() as conn: + await ensure_scrape_jobs_table(conn) print("✅ Database tilkoblet og pool opprettet") except Exception as e: print(f"❌ Databasefeil under oppstart: {e}") @@ -459,31 +456,26 @@ async def update_facility_full(facility_id: int, request: Request): return {"status": "success", "message": "Anlegg, baner og scorekort ble oppdatert."} # --- NYTT ADMIN ENDPOINT: KJØRER SKRAPEREN FOR VALGTE IDER --- +@app.get("/api/admin/scrape-jobs") +async def get_scrape_jobs(job_type: Optional[str] = Query(default=None), limit: int = Query(default=10, ge=1, le=50)): + """Henter siste scrape-jobber, evt. filtrert på type.""" + if job_type and job_type not in SCRAPE_JOB_TYPES: + raise HTTPException(status_code=400, detail="Ugyldig jobbtype.") + return await list_scrape_jobs(app.state.pool, job_type=job_type, limit=limit) + + @app.post("/api/admin/run-scraper") -async def run_scraper_endpoint(request: ScrapeRunRequest, background_tasks: BackgroundTasks): - """ - Tar imot IDer for skraping, og starter en bakgrunnsjobb. - Gir et umiddelbart svar tilbake til frontenden slik at den slipper å vente. - """ - if not request.facility_ids: - raise HTTPException(status_code=400, detail="Ingen anleggs-IDer ble oppgitt.") - - print(f"📡 API mottok forespørsel om å kjøre skraping for IDer: {request.facility_ids}") - - background_tasks.add_task(run_scrape_worker, request.facility_ids) - - return {"status": "queued", "message": f"Skraping for {len(request.facility_ids)} anlegg ble lagt i kø."} +async def run_scraper_endpoint(request: ScrapeRunRequest): + """Legger banestatus-skraping i en persistent jobbkø.""" + print(f"📡 API mottok forespørsel om å kjøre banestatus-skraping for IDer: {request.facility_ids}") + return await queue_scrape_job("banestatus", request.facility_ids) + @app.post("/api/admin/run-membership-scraper") -async def run_membership_scraper_endpoint(request: ScrapeRunRequest, background_tasks: BackgroundTasks): +async def run_membership_scraper_endpoint(request: ScrapeRunRequest): """Tar imot IDer for medlemskapsskraping og legger jobben i kø.""" - if not request.facility_ids: - raise HTTPException(status_code=400, detail="Ingen anleggs-IDer ble oppgitt.") - print(f"📡 API mottok forespørsel om medlemskapsskraping for IDer: {request.facility_ids}") - background_tasks.add_task(run_membership_worker, request.facility_ids) - - return {"status": "queued", "message": f"Medlemskapsskraping for {len(request.facility_ids)} anlegg ble lagt i kø."} + return await queue_scrape_job("medlemskap", request.facility_ids) @app.get("/api/health") async def health_check(): @@ -585,25 +577,11 @@ async def approve_greenfee_bulk(request: BulkGreenfeeRequest): """, json.dumps(approval.greenfee), approval.facility_id) return {"status": "success"} -def run_greenfee_worker(facility_ids: List[int]): - """Kjører greenfee-skraperen i bakgrunnen.""" - print(f"🔄 STARTER GREENFEE-SKRAPING FOR IDER: {facility_ids}") - try: - import subprocess - ids_arg = ",".join(map(str, facility_ids)) - command = f"python -u scrape_greenfee.py --ids {ids_arg}" - subprocess.run(command, shell=True, check=True) - print(f"✅ GREENFEE-SKRAPING FULLFØRT FOR IDER: {facility_ids}") - except Exception as e: - print(f"🔥 FEIL UNDER GREENFEE-SKRAPING: {e}") - @app.post("/api/admin/run-greenfee-scraper") -async def run_greenfee_scraper_endpoint(request: ScrapeRunRequest, background_tasks: BackgroundTasks): +async def run_greenfee_scraper_endpoint(request: ScrapeRunRequest): """Tar imot IDer for greenfeeskraping og legger jobben i kø.""" - if not request.facility_ids: - raise HTTPException(status_code=400, detail="Ingen anleggs-IDer ble oppgitt.") - background_tasks.add_task(run_greenfee_worker, request.facility_ids) - return {"status": "queued", "message": "Skraping startet"} + print(f"📡 API mottok forespørsel om greenfee-skraping for IDer: {request.facility_ids}") + return await queue_scrape_job("greenfee", request.facility_ids) # --- VEIEN TIL GOLF (VTG) "VASKERI" ENDEPUNKTER --- @@ -638,26 +616,12 @@ async def approve_vtg_bulk(request: BulkVtgRequest): """, approval.vtg_pris, approval.vtg_beskrivelse, datoer_json, approval.facility_id) return {"status": "success"} -def run_vtg_worker(facility_ids: List[int]): - """Kjører VTG-skraperen i bakgrunnen.""" - print(f"🔄 STARTER VTG-SKRAPING FOR IDER: {facility_ids}") - try: - import subprocess - ids_arg = ",".join(map(str, facility_ids)) - command = f"python -u scrape_vtg.py --ids {ids_arg}" - subprocess.run(command, shell=True, check=True) - print(f"✅ VTG-SKRAPING FULLFØRT FOR IDER: {facility_ids}") - except Exception as e: - print(f"🔥 FEIL UNDER VTG-SKRAPING: {e}") - @app.post("/api/admin/run-vtg-scraper") -async def run_vtg_scraper_endpoint(request: ScrapeRunRequest, background_tasks: BackgroundTasks): +async def run_vtg_scraper_endpoint(request: ScrapeRunRequest): """Tar imot IDer for VTG-skraping og legger jobben i kø.""" - if not request.facility_ids: - raise HTTPException(status_code=400, detail="Ingen anleggs-IDer ble oppgitt.") - background_tasks.add_task(run_vtg_worker, request.facility_ids) - return {"status": "queued", "message": "Skraping startet"} + print(f"📡 API mottok forespørsel om VTG-skraping for IDer: {request.facility_ids}") + return await queue_scrape_job("vtg", request.facility_ids) if __name__ == "__main__": import uvicorn - uvicorn.run(app, host="0.0.0.0", port=8000) \ No newline at end of file + uvicorn.run(app, host="0.0.0.0", port=8000) diff --git a/backend/scrape_greenfee.py b/backend/scrape_greenfee.py index edf796f..d0cc04d 100644 --- a/backend/scrape_greenfee.py +++ b/backend/scrape_greenfee.py @@ -107,6 +107,10 @@ Returner KUN et gyldig JSON-objekt med nøyaktig følgende struktur: async def run_greenfee_scraper(facility_ids=None): print("🚀 Starter Greenfee-skraperen...") conn = await asyncpg.connect(DB_URL) + facilities = [] + analyzed_count = 0 + saved_count = 0 + skipped_count = 0 try: query = "SELECT id, name, greenfee_url FROM facilities WHERE greenfee_url IS NOT NULL AND greenfee_url != ''" @@ -136,12 +140,16 @@ async def run_greenfee_scraper(facility_ids=None): if len(combined_text) < 50: print(" ⚠️ Fant for lite tekst, hopper over.") + skipped_count += 1 continue draft_data = analyze_greenfee_with_gemini(combined_text[:25000], name) if not draft_data: + skipped_count += 1 continue + + analyzed_count += 1 funnet_priser = len(draft_data.get('foreslatt_greenfee', [])) funnet_klubber = len(draft_data.get('foreslatt_avtaleklubber', [])) @@ -154,6 +162,7 @@ async def run_greenfee_scraper(facility_ids=None): """, json.dumps(draft_data), fac_id) print(" 💾 Greenfee-utkast lagret i databasen!") + saved_count += 1 await browser.close() @@ -161,6 +170,13 @@ async def run_greenfee_scraper(facility_ids=None): await conn.close() print("\n🏁 Skraping fullført.") + return { + "processed_facilities": len(facilities), + "analyzed_facilities": analyzed_count, + "saved_drafts": saved_count, + "skipped_facilities": skipped_count, + } + if __name__ == "__main__": parser = argparse.ArgumentParser(description="Skrap greenfeepriser via AI.") parser.add_argument("--ids", type=str, help="Kommaseparert liste med facility IDs (eks: 1,5,12)") @@ -170,4 +186,4 @@ if __name__ == "__main__": if args.ids: ids_to_scrape = [int(x.strip()) for x in args.ids.split(",")] - asyncio.run(run_greenfee_scraper(ids_to_scrape)) \ No newline at end of file + asyncio.run(run_greenfee_scraper(ids_to_scrape)) diff --git a/backend/scrape_job_runner.py b/backend/scrape_job_runner.py new file mode 100755 index 0000000..fc8e688 --- /dev/null +++ b/backend/scrape_job_runner.py @@ -0,0 +1,24 @@ +from typing import Any + +from scrape_greenfee import run_greenfee_scraper +from scrape_membership import run_scraper as run_membership_scraper +from scrape_status import run_daily_scraping +from scrape_vtg import run_vtg_scraper + + +async def run_scrape_job(job: dict[str, Any]) -> dict[str, Any]: + job_type = job["job_type"] + facility_ids = job.get("facility_ids") or [] + + if job_type == "banestatus": + result = await run_daily_scraping(facility_ids) + elif job_type == "medlemskap": + result = await run_membership_scraper(facility_ids) + elif job_type == "greenfee": + result = await run_greenfee_scraper(facility_ids) + elif job_type == "vtg": + result = await run_vtg_scraper(facility_ids) + else: + raise ValueError(f"Ukjent scrape-jobbtype: {job_type}") + + return result or {} diff --git a/backend/scrape_jobs.py b/backend/scrape_jobs.py new file mode 100755 index 0000000..97d96d7 --- /dev/null +++ b/backend/scrape_jobs.py @@ -0,0 +1,236 @@ +import json +from datetime import date, datetime +from typing import Any, Iterable + +SCRAPE_JOB_TYPES = ("banestatus", "medlemskap", "greenfee", "vtg") +SCRAPE_JOB_STATUSES = ("pending", "running", "completed", "failed") + + +def normalize_facility_ids(facility_ids: Iterable[int]) -> list[int]: + cleaned = { + int(facility_id) + for facility_id in facility_ids + if str(facility_id).strip() + } + return sorted(facility_id for facility_id in cleaned if facility_id > 0) + + +def _parse_json(value: Any, fallback: Any) -> Any: + if value is None: + return fallback + if isinstance(value, str): + try: + return json.loads(value) + except json.JSONDecodeError: + return fallback + return value + + +def format_scrape_job_row(row: Any) -> dict[str, Any] | None: + if row is None: + return None + + data = dict(row) + + for key in ("created_at", "started_at", "finished_at", "updated_at", "last_heartbeat_at"): + if isinstance(data.get(key), (date, datetime)): + data[key] = data[key].isoformat() + + facility_ids = _parse_json(data.get("facility_ids"), []) + data["facility_ids"] = facility_ids if isinstance(facility_ids, list) else [] + + result_summary = _parse_json(data.get("result_summary"), {}) + data["result_summary"] = result_summary if isinstance(result_summary, dict) else {} + + return data + + +async def ensure_scrape_jobs_table(conn) -> None: + await conn.execute( + """ + CREATE TABLE IF NOT EXISTS scrape_jobs ( + id SERIAL PRIMARY KEY, + job_type VARCHAR(50) NOT NULL CHECK (job_type IN ('banestatus', 'medlemskap', 'greenfee', 'vtg')), + facility_ids JSONB NOT NULL DEFAULT '[]'::jsonb, + total_facilities INTEGER NOT NULL DEFAULT 0, + status VARCHAR(20) NOT NULL DEFAULT 'pending' CHECK (status IN ('pending', 'running', 'completed', 'failed')), + requested_by TEXT, + worker_name TEXT, + attempt_count INTEGER NOT NULL DEFAULT 0, + error_message TEXT, + result_summary JSONB NOT NULL DEFAULT '{}'::jsonb, + created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(), + started_at TIMESTAMPTZ, + finished_at TIMESTAMPTZ, + updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW(), + last_heartbeat_at TIMESTAMPTZ + ) + """ + ) + await conn.execute( + """ + CREATE INDEX IF NOT EXISTS idx_scrape_jobs_status_created_at + ON scrape_jobs (status, created_at) + """ + ) + await conn.execute( + """ + CREATE INDEX IF NOT EXISTS idx_scrape_jobs_job_type_created_at + ON scrape_jobs (job_type, created_at DESC) + """ + ) + + +async def enqueue_scrape_job(pool, job_type: str, facility_ids: Iterable[int], requested_by: str | None = None) -> tuple[dict[str, Any], bool]: + if job_type not in SCRAPE_JOB_TYPES: + raise ValueError(f"Ugyldig job_type: {job_type}") + + normalized_ids = normalize_facility_ids(facility_ids) + facility_ids_json = json.dumps(normalized_ids) + + async with pool.acquire() as conn: + async with conn.transaction(): + existing = await conn.fetchrow( + """ + SELECT * + FROM scrape_jobs + WHERE job_type = $1 + AND status IN ('pending', 'running') + AND facility_ids = $2::jsonb + ORDER BY created_at DESC + LIMIT 1 + """, + job_type, + facility_ids_json, + ) + if existing: + return format_scrape_job_row(existing), False + + row = await conn.fetchrow( + """ + INSERT INTO scrape_jobs ( + job_type, + facility_ids, + total_facilities, + requested_by + ) + VALUES ($1, $2::jsonb, $3, $4) + RETURNING * + """, + job_type, + facility_ids_json, + len(normalized_ids), + requested_by, + ) + return format_scrape_job_row(row), True + + +async def list_scrape_jobs(pool, job_type: str | None = None, limit: int = 10) -> list[dict[str, Any]]: + safe_limit = max(1, min(limit, 50)) + + async with pool.acquire() as conn: + if job_type: + rows = await conn.fetch( + """ + SELECT * + FROM scrape_jobs + WHERE job_type = $1 + ORDER BY created_at DESC + LIMIT $2 + """, + job_type, + safe_limit, + ) + else: + rows = await conn.fetch( + """ + SELECT * + FROM scrape_jobs + ORDER BY created_at DESC + LIMIT $1 + """, + safe_limit, + ) + return [format_scrape_job_row(row) for row in rows] + + +async def claim_next_scrape_job(pool, worker_name: str) -> dict[str, Any] | None: + async with pool.acquire() as conn: + async with conn.transaction(): + row = await conn.fetchrow( + """ + WITH next_job AS ( + SELECT id + FROM scrape_jobs + WHERE status = 'pending' + ORDER BY created_at ASC + FOR UPDATE SKIP LOCKED + LIMIT 1 + ) + UPDATE scrape_jobs AS job + SET status = 'running', + worker_name = $1, + attempt_count = job.attempt_count + 1, + started_at = COALESCE(job.started_at, NOW()), + updated_at = NOW(), + last_heartbeat_at = NOW(), + error_message = NULL + FROM next_job + WHERE job.id = next_job.id + RETURNING job.* + """, + worker_name, + ) + return format_scrape_job_row(row) + + +async def heartbeat_scrape_job(pool, job_id: int) -> None: + async with pool.acquire() as conn: + await conn.execute( + """ + UPDATE scrape_jobs + SET last_heartbeat_at = NOW(), + updated_at = NOW() + WHERE id = $1 + """, + job_id, + ) + + +async def complete_scrape_job(pool, job_id: int, result_summary: dict[str, Any] | None = None) -> None: + payload = json.dumps(result_summary or {}) + async with pool.acquire() as conn: + await conn.execute( + """ + UPDATE scrape_jobs + SET status = 'completed', + finished_at = NOW(), + updated_at = NOW(), + last_heartbeat_at = NOW(), + error_message = NULL, + result_summary = $2::jsonb + WHERE id = $1 + """, + job_id, + payload, + ) + + +async def fail_scrape_job(pool, job_id: int, error_message: str, result_summary: dict[str, Any] | None = None) -> None: + payload = json.dumps(result_summary or {}) + async with pool.acquire() as conn: + await conn.execute( + """ + UPDATE scrape_jobs + SET status = 'failed', + finished_at = NOW(), + updated_at = NOW(), + last_heartbeat_at = NOW(), + error_message = $2, + result_summary = $3::jsonb + WHERE id = $1 + """, + job_id, + error_message[:1000], + payload, + ) diff --git a/backend/scrape_membership.py b/backend/scrape_membership.py index 2fb99fe..6b1ee18 100644 --- a/backend/scrape_membership.py +++ b/backend/scrape_membership.py @@ -102,6 +102,10 @@ Merk: Prisene SKAL være tall (integer), ikke tekst. Sett til null hvis du ikke async def run_scraper(facility_ids=None): print("🚀 Starter Medlemskaps-skraperen (Støtter multi-URL)...") conn = await asyncpg.connect(DB_URL) + facilities = [] + analyzed_count = 0 + saved_count = 0 + skipped_count = 0 try: query = "SELECT id, name, medlemskap_url FROM facilities WHERE medlemskap_url IS NOT NULL AND medlemskap_url != ''" @@ -132,13 +136,17 @@ async def run_scraper(facility_ids=None): if len(combined_text) < 50: print(" ⚠️ Fant for lite tekst, hopper over.") + skipped_count += 1 continue # Kutter teksten for å ikke overbelaste Gemini (ca 25000 tegn maks) draft_data = analyze_with_gemini(combined_text[:25000], name) if not draft_data: + skipped_count += 1 continue + + analyzed_count += 1 print(f" ✅ AI foreslår: Standard: {draft_data.get('foreslatt_standard_pris')} | Rimeligste: {draft_data.get('foreslatt_rimeligste_pris')}") @@ -149,6 +157,7 @@ async def run_scraper(facility_ids=None): """, json.dumps(draft_data), fac_id) print(" 💾 Utkast lagret i databasen!") + saved_count += 1 await browser.close() @@ -156,6 +165,13 @@ async def run_scraper(facility_ids=None): await conn.close() print("\n🏁 Skraping fullført.") + return { + "processed_facilities": len(facilities), + "analyzed_facilities": analyzed_count, + "saved_drafts": saved_count, + "skipped_facilities": skipped_count, + } + if __name__ == "__main__": parser = argparse.ArgumentParser(description="Skrap medlemskapspriser via AI.") parser.add_argument("--ids", type=str, help="Kommaseparert liste med facility IDs (eks: 1,5,12)") @@ -165,4 +181,4 @@ if __name__ == "__main__": if args.ids: ids_to_scrape = [int(x.strip()) for x in args.ids.split(",")] - asyncio.run(run_scraper(ids_to_scrape)) \ No newline at end of file + asyncio.run(run_scraper(ids_to_scrape)) diff --git a/backend/scrape_status.py b/backend/scrape_status.py index fb12838..bd0d8bb 100644 --- a/backend/scrape_status.py +++ b/backend/scrape_status.py @@ -166,7 +166,12 @@ async def run_daily_scraping(facility_ids=None): if not facilities: print("⚠️ Fant ingen anlegg å skrape.") await conn.close() - return + return { + "processed_facilities": 0, + "updated_courses": 0, + "warnings": 0, + "successes": 0, + } changes, warnings, successes = [], [], [] @@ -315,6 +320,12 @@ async def run_daily_scraping(facility_ids=None): await conn.close() send_report(changes, warnings, successes) print("🏁 Ferdig.") + return { + "processed_facilities": len(facilities), + "updated_courses": len(changes), + "warnings": len(warnings), + "successes": len(successes), + } if __name__ == "__main__": parser = argparse.ArgumentParser(description="TeeOff Status Scraper") @@ -329,4 +340,4 @@ if __name__ == "__main__": print("❌ Feil format på --ids. Må være kommaseparerte tall, f.eks: 1,4,12") exit(1) - asyncio.run(run_daily_scraping(facility_ids_list)) \ No newline at end of file + asyncio.run(run_daily_scraping(facility_ids_list)) diff --git a/backend/scrape_vtg.py b/backend/scrape_vtg.py index 797545d..7c884ba 100644 --- a/backend/scrape_vtg.py +++ b/backend/scrape_vtg.py @@ -97,6 +97,10 @@ Merk: Sett foreslatt_vtg_pris til null (null) hvis du ikke finner den. Hvis du i async def run_vtg_scraper(facility_ids=None): print("🚀 Starter Veien til Golf (VTG) skraperen...") conn = await asyncpg.connect(DB_URL) + facilities = [] + analyzed_count = 0 + saved_count = 0 + skipped_count = 0 try: query = "SELECT id, name, vtg_lenke FROM facilities WHERE vtg_lenke IS NOT NULL AND vtg_lenke != ''" @@ -126,12 +130,16 @@ async def run_vtg_scraper(facility_ids=None): if len(combined_text) < 50: print(" ⚠️ Fant for lite tekst, hopper over.") + skipped_count += 1 continue draft_data = analyze_vtg_with_gemini(combined_text[:25000], name) if not draft_data: + skipped_count += 1 continue + + analyzed_count += 1 print(f" ✅ AI fant pris: {draft_data.get('foreslatt_vtg_pris')}, og {len(draft_data.get('foreslatt_vtg_datoer', []))} datoer.") @@ -142,6 +150,7 @@ async def run_vtg_scraper(facility_ids=None): """, json.dumps(draft_data), fac_id) print(" 💾 VTG-utkast lagret i databasen!") + saved_count += 1 await browser.close() @@ -149,6 +158,13 @@ async def run_vtg_scraper(facility_ids=None): await conn.close() print("\n🏁 Skraping fullført.") + return { + "processed_facilities": len(facilities), + "analyzed_facilities": analyzed_count, + "saved_drafts": saved_count, + "skipped_facilities": skipped_count, + } + if __name__ == "__main__": parser = argparse.ArgumentParser(description="Skrap VTG via AI.") parser.add_argument("--ids", type=str, help="Kommaseparert liste med facility IDs (eks: 1,5,12)") @@ -158,4 +174,4 @@ if __name__ == "__main__": if args.ids: ids_to_scrape = [int(x.strip()) for x in args.ids.split(",")] - asyncio.run(run_vtg_scraper(ids_to_scrape)) \ No newline at end of file + asyncio.run(run_vtg_scraper(ids_to_scrape)) diff --git a/backend/worker.py b/backend/worker.py new file mode 100755 index 0000000..a5f60df --- /dev/null +++ b/backend/worker.py @@ -0,0 +1,77 @@ +import asyncio +import os +import traceback + +import asyncpg +from dotenv import load_dotenv + +from scrape_job_runner import run_scrape_job +from scrape_jobs import ( + claim_next_scrape_job, + complete_scrape_job, + ensure_scrape_jobs_table, + fail_scrape_job, + heartbeat_scrape_job, +) + +load_dotenv() + +DB_URL = os.getenv("DATABASE_URL", "postgresql://teeoff_admin:teeoff_secret_password@db:5432/teeoff") +WORKER_NAME = os.getenv("SCRAPE_WORKER_NAME", f"scrape-worker-{os.getpid()}") +POLL_INTERVAL_SECONDS = int(os.getenv("SCRAPE_WORKER_POLL_INTERVAL", "5")) +HEARTBEAT_INTERVAL_SECONDS = int(os.getenv("SCRAPE_WORKER_HEARTBEAT_INTERVAL", "15")) + + +async def heartbeat_loop(pool, job_id: int, stop_event: asyncio.Event) -> None: + while not stop_event.is_set(): + try: + await asyncio.wait_for(stop_event.wait(), timeout=HEARTBEAT_INTERVAL_SECONDS) + except asyncio.TimeoutError: + try: + await heartbeat_scrape_job(pool, job_id) + except Exception as exc: + print(f"⚠️ Klarte ikke å oppdatere heartbeat for jobb {job_id}: {exc}") + + +async def main() -> None: + print(f"🚀 Starter scrape worker: {WORKER_NAME}") + pool = await asyncpg.create_pool(DB_URL, min_size=1, max_size=5, command_timeout=60) + + try: + async with pool.acquire() as conn: + await ensure_scrape_jobs_table(conn) + + while True: + job = await claim_next_scrape_job(pool, WORKER_NAME) + if not job: + await asyncio.sleep(POLL_INTERVAL_SECONDS) + continue + + job_id = job["id"] + print(f"🎯 Worker plukket jobb #{job_id} ({job['job_type']}) for {len(job.get('facility_ids', []))} anlegg") + + stop_event = asyncio.Event() + heartbeat_task = asyncio.create_task(heartbeat_loop(pool, job_id, stop_event)) + + try: + result_summary = await run_scrape_job(job) + await complete_scrape_job(pool, job_id, result_summary) + print(f"✅ Jobb #{job_id} fullført") + except Exception as exc: + trace = traceback.format_exc(limit=5) + print(f"🔥 Jobb #{job_id} feilet: {exc}\n{trace}") + await fail_scrape_job( + pool, + job_id, + str(exc), + {"traceback": trace}, + ) + finally: + stop_event.set() + await heartbeat_task + finally: + await pool.close() + + +if __name__ == "__main__": + asyncio.run(main()) diff --git a/docker-compose.yml b/docker-compose.yml index 2ce36ee..c8fbf27 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -25,6 +25,16 @@ services: - db restart: unless-stopped + worker: + build: ./backend + container_name: teeoff_worker + command: python worker.py + volumes: + - ./backend:/app + depends_on: + - db + restart: unless-stopped + frontend: build: ./frontend container_name: teeoff_frontend @@ -38,4 +48,4 @@ services: restart: unless-stopped volumes: - teeoff_db_data: \ No newline at end of file + teeoff_db_data: diff --git a/frontend/src/app/admin/page.tsx b/frontend/src/app/admin/page.tsx index 1bb56fa..51d3a5f 100644 --- a/frontend/src/app/admin/page.tsx +++ b/frontend/src/app/admin/page.tsx @@ -3,11 +3,42 @@ * TEE OFF ADMIN DASHBOARD v4.0 - KONTROLLPANEL */ -import { useState, useEffect, useMemo } from 'react'; +import { useEffect, useMemo, useRef, useState } from 'react'; import { API_URL } from "@/config/constants"; import ScrapeMethodSelect from "@/components/ScrapeMethodSelect"; import Link from 'next/link'; +type AdminTab = 'banestatus' | 'medlemskap' | 'greenfee' | 'vtg'; + +type ScrapeJobStatus = 'pending' | 'running' | 'completed' | 'failed'; + +type ScrapeJob = { + id: number; + job_type: AdminTab; + status: ScrapeJobStatus; + facility_ids: number[]; + total_facilities: number; + error_message?: string | null; + result_summary?: Record; + created_at?: string | null; + started_at?: string | null; + finished_at?: string | null; +}; + +const JOB_LABELS: Record = { + banestatus: 'Banestatus', + medlemskap: 'Medlemskap', + greenfee: 'Greenfee', + vtg: 'VTG', +}; + +const JOB_STATUS_LABELS: Record = { + pending: 'I kø', + running: 'Kjører', + completed: 'Fullført', + failed: 'Feilet', +}; + const InlineEdit = ({ facilityId, field, initialValue, onSave }: { facilityId: number, field: string, initialValue: string, onSave: (id: number, field: string, val: string) => void }) => { const [isEditing, setIsEditing] = useState(false); const [value, setValue] = useState(initialValue || ''); @@ -45,13 +76,15 @@ export default function AdminDashboard() { const [facilities, setFacilities] = useState([]); const [loading, setLoading] = useState(true); const [selectedFacilities, setSelectedFacilities] = useState([]); - const [isScraping, setIsScraping] = useState(false); + const [scrapeJobs, setScrapeJobs] = useState([]); + const [isQueueing, setIsQueueing] = useState(false); const [isSidebarCollapsed, setIsSidebarCollapsed] = useState(false); const [editingFacility, setEditingFacility] = useState(null); - const [activeTab, setActiveTab] = useState<'banestatus' | 'medlemskap' | 'greenfee' | 'vtg'>('banestatus'); + const [activeTab, setActiveTab] = useState('banestatus'); const [statusFilter, setStatusFilter] = useState('alle'); const [editForm, setEditForm] = useState({ scrape_status_url: '', scrape_status_selector: '', scrape_method: '', ai_instruction: '', courses: [] as any[] }); const [isSaving, setIsSaving] = useState(false); + const latestJobStateRef = useRef(null); const fetchFacilities = () => { fetch(`${API_URL}/facilities`) @@ -63,15 +96,58 @@ export default function AdminDashboard() { .catch(() => setLoading(false)); }; - useEffect(() => { fetchFacilities(); }, []); + const fetchScrapeJobs = (tab: AdminTab = activeTab) => { + fetch(`${API_URL}/admin/scrape-jobs?job_type=${tab}&limit=5`) + .then(res => res.json()) + .then(data => { + setScrapeJobs(Array.isArray(data) ? data : []); + }) + .catch(() => setScrapeJobs([])); + }; + + const activeJob = useMemo( + () => scrapeJobs.find(job => job.status === 'pending' || job.status === 'running') || null, + [scrapeJobs] + ); + const latestJob = scrapeJobs[0] || null; + const isScraping = !!activeJob; useEffect(() => { - let interval: NodeJS.Timeout; - if (isScraping) interval = setInterval(() => fetchFacilities(), 10000); + fetchFacilities(); + fetchScrapeJobs('banestatus'); + }, []); + + useEffect(() => { + const interval = setInterval(() => fetchScrapeJobs(activeTab), 5000); + return () => clearInterval(interval); + }, [activeTab]); + + useEffect(() => { + setSelectedFacilities([]); + fetchScrapeJobs(activeTab); + }, [activeTab]); + + useEffect(() => { + if (!isScraping) return; + const interval = setInterval(() => fetchFacilities(), 10000); return () => clearInterval(interval); }, [isScraping]); - useEffect(() => { setSelectedFacilities([]); }, [activeTab]); + useEffect(() => { + const currentState = latestJob ? `${latestJob.id}:${latestJob.status}` : null; + const previousState = latestJobStateRef.current; + latestJobStateRef.current = currentState; + + if ( + previousState && + previousState !== currentState && + latestJob && + (latestJob.status === 'completed' || latestJob.status === 'failed') + ) { + fetchFacilities(); + fetchScrapeJobs(activeTab); + } + }, [activeTab, latestJob]); const filteredFacilities = useMemo(() => { if (statusFilter === 'alle') return facilities; @@ -89,6 +165,15 @@ export default function AdminDashboard() { }).filter(facility => facility.course_statuses && facility.course_statuses.length > 0); }, [facilities, statusFilter]); + const latestJobSummary = useMemo(() => { + if (!latestJob?.result_summary) return ''; + + return Object.entries(latestJob.result_summary) + .filter(([, value]) => value !== null && value !== undefined && value !== '') + .map(([key, value]) => `${key.replaceAll('_', ' ')}: ${value}`) + .join(' • '); + }, [latestJob]); + const handleSelectAll = (e: React.ChangeEvent) => { if (e.target.checked) setSelectedFacilities(filteredFacilities.map(f => f.id)); else setSelectedFacilities([]); @@ -115,8 +200,8 @@ export default function AdminDashboard() { }; const handleRunScrapers = async () => { - if (isScraping) { setIsScraping(false); return; } - setIsScraping(true); + if (selectedFacilities.length === 0) return; + setIsQueueing(true); const endpoint = activeTab === 'banestatus' ? '/admin/run-scraper' : activeTab === 'medlemskap' ? '/admin/run-membership-scraper' : activeTab === 'greenfee' ? '/admin/run-greenfee-scraper' : @@ -127,13 +212,14 @@ export default function AdminDashboard() { headers: { 'Content-Type': 'application/json' }, body: JSON.stringify({ facility_ids: selectedFacilities }) }); - if (!response.ok) throw new Error("Kunne ikke starte skraping"); - const timeoutMs = Math.max(selectedFacilities.length * 40 * 1000, 60000); - setSelectedFacilities([]); - setTimeout(() => setIsScraping(false), timeoutMs); + const data = await response.json(); + if (!response.ok) throw new Error(data.detail || "Kunne ikke starte skraping"); + setSelectedFacilities([]); + fetchScrapeJobs(activeTab); } catch (error) { alert(`Feil ved start av ${activeTab}-skraperen.`); - setIsScraping(false); + } finally { + setIsQueueing(false); } }; @@ -280,14 +366,63 @@ export default function AdminDashboard() { + {latestJob && latestJob.job_type === activeTab && ( +
+
+
+

+ {JOB_LABELS[activeTab]} jobb #{latestJob.id} +

+
+ + {JOB_STATUS_LABELS[latestJob.status]} + + + {latestJob.total_facilities} anlegg + + {latestJob.created_at && ( + + Opprettet {new Date(latestJob.created_at).toLocaleString('nb-NO')} + + )} +
+ {latestJobSummary && ( +

{latestJobSummary}

+ )} + {latestJob.error_message && ( +

{latestJob.error_message}

+ )} +
+ +
+
+ )} + {/* VELDIG SYNLIGE FANER */}
@@ -465,4 +600,4 @@ export default function AdminDashboard() {
); -} \ No newline at end of file +}