Etter første innblanding fra Codex

This commit is contained in:
Erol 2026-04-10 18:37:33 +02:00
parent 97ca464aef
commit eb0f7d2907
10 changed files with 620 additions and 115 deletions

View file

@ -9,7 +9,7 @@ LOV: Aldri trunker eller slett logikk for "effektivitet".
--------------------------------------------------------------------------- ---------------------------------------------------------------------------
""" """
from fastapi import FastAPI, HTTPException, Response, Cookie, Depends, Request, BackgroundTasks from fastapi import FastAPI, HTTPException, Response, Request, Query
from fastapi.middleware.cors import CORSMiddleware from fastapi.middleware.cors import CORSMiddleware
from contextlib import asynccontextmanager from contextlib import asynccontextmanager
import asyncpg import asyncpg
@ -21,10 +21,15 @@ from jose import jwt, JWTError
from passlib.context import CryptContext from passlib.context import CryptContext
from dotenv import load_dotenv from dotenv import load_dotenv
# NYE IMPORTER FOR ADMIN PANELET OG BAKGRUNNSJOBBER
from pydantic import BaseModel from pydantic import BaseModel
from typing import Optional, List, Any from typing import Optional, List, Any
import subprocess
from scrape_jobs import (
SCRAPE_JOB_TYPES,
enqueue_scrape_job,
ensure_scrape_jobs_table,
list_scrape_jobs,
)
load_dotenv() load_dotenv()
@ -92,7 +97,10 @@ def format_row(row):
d = dict(row) d = dict(row)
for key in ['status_updated_at', 'created_at', 'slope_valid_until', 'membership_updated_at']: for key in [
'status_updated_at', 'created_at', 'slope_valid_until',
'membership_updated_at', 'greenfee_updated_at', 'vtg_updated_at'
]:
if isinstance(d.get(key), (date, datetime)): if isinstance(d.get(key), (date, datetime)):
d[key] = d[key].isoformat() d[key] = d[key].isoformat()
@ -101,7 +109,8 @@ def format_row(row):
'faqs', 'shotzoom', 'social_links', 'holes', 'golfpakker', 'cooperating_clubs', 'vtg_datoer' 'faqs', 'shotzoom', 'social_links', 'holes', 'golfpakker', 'cooperating_clubs', 'vtg_datoer'
] ]
json_dict_fields = [ json_dict_fields = [
'amenities', 'vtg', 'nsg_data', 'golfamore_data', 'membership_draft' 'amenities', 'vtg', 'nsg_data', 'golfamore_data',
'membership_draft', 'greenfee_draft', 'vtg_draft'
] ]
for field in json_list_fields: for field in json_list_fields:
@ -132,38 +141,24 @@ def format_row(row):
return d return d
# --- BAKGRUNNSARBEIDER: FUNKSJON SOM KJØRER SKRAPEREN I BAKGRUNNEN --- async def queue_scrape_job(job_type: str, facility_ids: List[int]):
def run_scrape_worker(facility_ids: List[int]): if job_type not in SCRAPE_JOB_TYPES:
""" raise HTTPException(status_code=400, detail=f"Ugyldig jobbtype: {job_type}")
Kjører selve skraping-scriptet i bakgrunnen. if not facility_ids:
Slik kan frontenden et umiddelbart svar, mens skraperen jobber. raise HTTPException(status_code=400, detail="Ingen anleggs-IDer ble oppgitt.")
"""
print(f"🔄 STARTER BAKGRUNNSSKRAPING FOR FØLGENDE IDER: {facility_ids}")
try:
ids_arg = ",".join(map(str, facility_ids))
# NYTT: Bruker "python -u" for LIVE logging, og fjerner "> /dev/null 2>&1"
command = f"python -u scrape_status.py --ids {ids_arg}"
subprocess.run(command, shell=True, check=True)
print(f"✅ BAKGRUNNSSKRAPING FULLFØRT FOR IDER: {facility_ids}")
except subprocess.CalledProcessError as e:
print(f"❌ FEIL UNDER BAKGRUNNSSKRAPING: {e}")
except Exception as e:
print(f"🔥 UFORUTSETT FEIL UNDER BAKGRUNNSSKRAPING: {e}")
def run_membership_worker(facility_ids: List[int]): job, was_created = await enqueue_scrape_job(app.state.pool, job_type, facility_ids)
"""Kjører medlemskap-skraperen i bakgrunnen.""" status = "queued" if was_created else "already_queued"
print(f"🔄 STARTER MEDLEMSKAP-SKRAPING FOR IDER: {facility_ids}") message = (
try: f"{job_type.capitalize()}-skraping for {len(job['facility_ids'])} anlegg ble lagt i kø."
ids_arg = ",".join(map(str, facility_ids)) if was_created
command = f"python -u scrape_membership.py --ids {ids_arg}" else f"Fant allerede en aktiv {job_type}-jobb for samme anlegg."
subprocess.run(command, shell=True, check=True) )
print(f"✅ MEDLEMSKAP-SKRAPING FULLFØRT FOR IDER: {facility_ids}") return {
except Exception as e: "status": status,
print(f"🔥 FEIL UNDER MEDLEMSKAP-SKRAPING: {e}") "message": message,
"job": job,
}
@asynccontextmanager @asynccontextmanager
@ -177,6 +172,8 @@ async def lifespan(app: FastAPI):
max_size=20, max_size=20,
command_timeout=60 command_timeout=60
) )
async with app.state.pool.acquire() as conn:
await ensure_scrape_jobs_table(conn)
print("✅ Database tilkoblet og pool opprettet") print("✅ Database tilkoblet og pool opprettet")
except Exception as e: except Exception as e:
print(f"❌ Databasefeil under oppstart: {e}") print(f"❌ Databasefeil under oppstart: {e}")
@ -459,31 +456,26 @@ async def update_facility_full(facility_id: int, request: Request):
return {"status": "success", "message": "Anlegg, baner og scorekort ble oppdatert."} return {"status": "success", "message": "Anlegg, baner og scorekort ble oppdatert."}
# --- NYTT ADMIN ENDPOINT: KJØRER SKRAPEREN FOR VALGTE IDER --- # --- NYTT ADMIN ENDPOINT: KJØRER SKRAPEREN FOR VALGTE IDER ---
@app.get("/api/admin/scrape-jobs")
async def get_scrape_jobs(job_type: Optional[str] = Query(default=None), limit: int = Query(default=10, ge=1, le=50)):
"""Henter siste scrape-jobber, evt. filtrert på type."""
if job_type and job_type not in SCRAPE_JOB_TYPES:
raise HTTPException(status_code=400, detail="Ugyldig jobbtype.")
return await list_scrape_jobs(app.state.pool, job_type=job_type, limit=limit)
@app.post("/api/admin/run-scraper") @app.post("/api/admin/run-scraper")
async def run_scraper_endpoint(request: ScrapeRunRequest, background_tasks: BackgroundTasks): async def run_scraper_endpoint(request: ScrapeRunRequest):
""" """Legger banestatus-skraping i en persistent jobbkø."""
Tar imot IDer for skraping, og starter en bakgrunnsjobb. print(f"📡 API mottok forespørsel om å kjøre banestatus-skraping for IDer: {request.facility_ids}")
Gir et umiddelbart svar tilbake til frontenden slik at den slipper å vente. return await queue_scrape_job("banestatus", request.facility_ids)
"""
if not request.facility_ids:
raise HTTPException(status_code=400, detail="Ingen anleggs-IDer ble oppgitt.")
print(f"📡 API mottok forespørsel om å kjøre skraping for IDer: {request.facility_ids}")
background_tasks.add_task(run_scrape_worker, request.facility_ids)
return {"status": "queued", "message": f"Skraping for {len(request.facility_ids)} anlegg ble lagt i kø."}
@app.post("/api/admin/run-membership-scraper") @app.post("/api/admin/run-membership-scraper")
async def run_membership_scraper_endpoint(request: ScrapeRunRequest, background_tasks: BackgroundTasks): async def run_membership_scraper_endpoint(request: ScrapeRunRequest):
"""Tar imot IDer for medlemskapsskraping og legger jobben i kø.""" """Tar imot IDer for medlemskapsskraping og legger jobben i kø."""
if not request.facility_ids:
raise HTTPException(status_code=400, detail="Ingen anleggs-IDer ble oppgitt.")
print(f"📡 API mottok forespørsel om medlemskapsskraping for IDer: {request.facility_ids}") print(f"📡 API mottok forespørsel om medlemskapsskraping for IDer: {request.facility_ids}")
background_tasks.add_task(run_membership_worker, request.facility_ids) return await queue_scrape_job("medlemskap", request.facility_ids)
return {"status": "queued", "message": f"Medlemskapsskraping for {len(request.facility_ids)} anlegg ble lagt i kø."}
@app.get("/api/health") @app.get("/api/health")
async def health_check(): async def health_check():
@ -585,25 +577,11 @@ async def approve_greenfee_bulk(request: BulkGreenfeeRequest):
""", json.dumps(approval.greenfee), approval.facility_id) """, json.dumps(approval.greenfee), approval.facility_id)
return {"status": "success"} return {"status": "success"}
def run_greenfee_worker(facility_ids: List[int]):
"""Kjører greenfee-skraperen i bakgrunnen."""
print(f"🔄 STARTER GREENFEE-SKRAPING FOR IDER: {facility_ids}")
try:
import subprocess
ids_arg = ",".join(map(str, facility_ids))
command = f"python -u scrape_greenfee.py --ids {ids_arg}"
subprocess.run(command, shell=True, check=True)
print(f"✅ GREENFEE-SKRAPING FULLFØRT FOR IDER: {facility_ids}")
except Exception as e:
print(f"🔥 FEIL UNDER GREENFEE-SKRAPING: {e}")
@app.post("/api/admin/run-greenfee-scraper") @app.post("/api/admin/run-greenfee-scraper")
async def run_greenfee_scraper_endpoint(request: ScrapeRunRequest, background_tasks: BackgroundTasks): async def run_greenfee_scraper_endpoint(request: ScrapeRunRequest):
"""Tar imot IDer for greenfeeskraping og legger jobben i kø.""" """Tar imot IDer for greenfeeskraping og legger jobben i kø."""
if not request.facility_ids: print(f"📡 API mottok forespørsel om greenfee-skraping for IDer: {request.facility_ids}")
raise HTTPException(status_code=400, detail="Ingen anleggs-IDer ble oppgitt.") return await queue_scrape_job("greenfee", request.facility_ids)
background_tasks.add_task(run_greenfee_worker, request.facility_ids)
return {"status": "queued", "message": "Skraping startet"}
# --- VEIEN TIL GOLF (VTG) "VASKERI" ENDEPUNKTER --- # --- VEIEN TIL GOLF (VTG) "VASKERI" ENDEPUNKTER ---
@ -638,26 +616,12 @@ async def approve_vtg_bulk(request: BulkVtgRequest):
""", approval.vtg_pris, approval.vtg_beskrivelse, datoer_json, approval.facility_id) """, approval.vtg_pris, approval.vtg_beskrivelse, datoer_json, approval.facility_id)
return {"status": "success"} return {"status": "success"}
def run_vtg_worker(facility_ids: List[int]):
"""Kjører VTG-skraperen i bakgrunnen."""
print(f"🔄 STARTER VTG-SKRAPING FOR IDER: {facility_ids}")
try:
import subprocess
ids_arg = ",".join(map(str, facility_ids))
command = f"python -u scrape_vtg.py --ids {ids_arg}"
subprocess.run(command, shell=True, check=True)
print(f"✅ VTG-SKRAPING FULLFØRT FOR IDER: {facility_ids}")
except Exception as e:
print(f"🔥 FEIL UNDER VTG-SKRAPING: {e}")
@app.post("/api/admin/run-vtg-scraper") @app.post("/api/admin/run-vtg-scraper")
async def run_vtg_scraper_endpoint(request: ScrapeRunRequest, background_tasks: BackgroundTasks): async def run_vtg_scraper_endpoint(request: ScrapeRunRequest):
"""Tar imot IDer for VTG-skraping og legger jobben i kø.""" """Tar imot IDer for VTG-skraping og legger jobben i kø."""
if not request.facility_ids: print(f"📡 API mottok forespørsel om VTG-skraping for IDer: {request.facility_ids}")
raise HTTPException(status_code=400, detail="Ingen anleggs-IDer ble oppgitt.") return await queue_scrape_job("vtg", request.facility_ids)
background_tasks.add_task(run_vtg_worker, request.facility_ids)
return {"status": "queued", "message": "Skraping startet"}
if __name__ == "__main__": if __name__ == "__main__":
import uvicorn import uvicorn
uvicorn.run(app, host="0.0.0.0", port=8000) uvicorn.run(app, host="0.0.0.0", port=8000)

View file

@ -107,6 +107,10 @@ Returner KUN et gyldig JSON-objekt med nøyaktig følgende struktur:
async def run_greenfee_scraper(facility_ids=None): async def run_greenfee_scraper(facility_ids=None):
print("🚀 Starter Greenfee-skraperen...") print("🚀 Starter Greenfee-skraperen...")
conn = await asyncpg.connect(DB_URL) conn = await asyncpg.connect(DB_URL)
facilities = []
analyzed_count = 0
saved_count = 0
skipped_count = 0
try: try:
query = "SELECT id, name, greenfee_url FROM facilities WHERE greenfee_url IS NOT NULL AND greenfee_url != ''" query = "SELECT id, name, greenfee_url FROM facilities WHERE greenfee_url IS NOT NULL AND greenfee_url != ''"
@ -136,12 +140,16 @@ async def run_greenfee_scraper(facility_ids=None):
if len(combined_text) < 50: if len(combined_text) < 50:
print(" ⚠️ Fant for lite tekst, hopper over.") print(" ⚠️ Fant for lite tekst, hopper over.")
skipped_count += 1
continue continue
draft_data = analyze_greenfee_with_gemini(combined_text[:25000], name) draft_data = analyze_greenfee_with_gemini(combined_text[:25000], name)
if not draft_data: if not draft_data:
skipped_count += 1
continue continue
analyzed_count += 1
funnet_priser = len(draft_data.get('foreslatt_greenfee', [])) funnet_priser = len(draft_data.get('foreslatt_greenfee', []))
funnet_klubber = len(draft_data.get('foreslatt_avtaleklubber', [])) funnet_klubber = len(draft_data.get('foreslatt_avtaleklubber', []))
@ -154,6 +162,7 @@ async def run_greenfee_scraper(facility_ids=None):
""", json.dumps(draft_data), fac_id) """, json.dumps(draft_data), fac_id)
print(" 💾 Greenfee-utkast lagret i databasen!") print(" 💾 Greenfee-utkast lagret i databasen!")
saved_count += 1
await browser.close() await browser.close()
@ -161,6 +170,13 @@ async def run_greenfee_scraper(facility_ids=None):
await conn.close() await conn.close()
print("\n🏁 Skraping fullført.") print("\n🏁 Skraping fullført.")
return {
"processed_facilities": len(facilities),
"analyzed_facilities": analyzed_count,
"saved_drafts": saved_count,
"skipped_facilities": skipped_count,
}
if __name__ == "__main__": if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Skrap greenfeepriser via AI.") parser = argparse.ArgumentParser(description="Skrap greenfeepriser via AI.")
parser.add_argument("--ids", type=str, help="Kommaseparert liste med facility IDs (eks: 1,5,12)") parser.add_argument("--ids", type=str, help="Kommaseparert liste med facility IDs (eks: 1,5,12)")
@ -170,4 +186,4 @@ if __name__ == "__main__":
if args.ids: if args.ids:
ids_to_scrape = [int(x.strip()) for x in args.ids.split(",")] ids_to_scrape = [int(x.strip()) for x in args.ids.split(",")]
asyncio.run(run_greenfee_scraper(ids_to_scrape)) asyncio.run(run_greenfee_scraper(ids_to_scrape))

24
backend/scrape_job_runner.py Executable file
View file

@ -0,0 +1,24 @@
from typing import Any
from scrape_greenfee import run_greenfee_scraper
from scrape_membership import run_scraper as run_membership_scraper
from scrape_status import run_daily_scraping
from scrape_vtg import run_vtg_scraper
async def run_scrape_job(job: dict[str, Any]) -> dict[str, Any]:
job_type = job["job_type"]
facility_ids = job.get("facility_ids") or []
if job_type == "banestatus":
result = await run_daily_scraping(facility_ids)
elif job_type == "medlemskap":
result = await run_membership_scraper(facility_ids)
elif job_type == "greenfee":
result = await run_greenfee_scraper(facility_ids)
elif job_type == "vtg":
result = await run_vtg_scraper(facility_ids)
else:
raise ValueError(f"Ukjent scrape-jobbtype: {job_type}")
return result or {}

236
backend/scrape_jobs.py Executable file
View file

@ -0,0 +1,236 @@
import json
from datetime import date, datetime
from typing import Any, Iterable
SCRAPE_JOB_TYPES = ("banestatus", "medlemskap", "greenfee", "vtg")
SCRAPE_JOB_STATUSES = ("pending", "running", "completed", "failed")
def normalize_facility_ids(facility_ids: Iterable[int]) -> list[int]:
cleaned = {
int(facility_id)
for facility_id in facility_ids
if str(facility_id).strip()
}
return sorted(facility_id for facility_id in cleaned if facility_id > 0)
def _parse_json(value: Any, fallback: Any) -> Any:
if value is None:
return fallback
if isinstance(value, str):
try:
return json.loads(value)
except json.JSONDecodeError:
return fallback
return value
def format_scrape_job_row(row: Any) -> dict[str, Any] | None:
if row is None:
return None
data = dict(row)
for key in ("created_at", "started_at", "finished_at", "updated_at", "last_heartbeat_at"):
if isinstance(data.get(key), (date, datetime)):
data[key] = data[key].isoformat()
facility_ids = _parse_json(data.get("facility_ids"), [])
data["facility_ids"] = facility_ids if isinstance(facility_ids, list) else []
result_summary = _parse_json(data.get("result_summary"), {})
data["result_summary"] = result_summary if isinstance(result_summary, dict) else {}
return data
async def ensure_scrape_jobs_table(conn) -> None:
await conn.execute(
"""
CREATE TABLE IF NOT EXISTS scrape_jobs (
id SERIAL PRIMARY KEY,
job_type VARCHAR(50) NOT NULL CHECK (job_type IN ('banestatus', 'medlemskap', 'greenfee', 'vtg')),
facility_ids JSONB NOT NULL DEFAULT '[]'::jsonb,
total_facilities INTEGER NOT NULL DEFAULT 0,
status VARCHAR(20) NOT NULL DEFAULT 'pending' CHECK (status IN ('pending', 'running', 'completed', 'failed')),
requested_by TEXT,
worker_name TEXT,
attempt_count INTEGER NOT NULL DEFAULT 0,
error_message TEXT,
result_summary JSONB NOT NULL DEFAULT '{}'::jsonb,
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
started_at TIMESTAMPTZ,
finished_at TIMESTAMPTZ,
updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
last_heartbeat_at TIMESTAMPTZ
)
"""
)
await conn.execute(
"""
CREATE INDEX IF NOT EXISTS idx_scrape_jobs_status_created_at
ON scrape_jobs (status, created_at)
"""
)
await conn.execute(
"""
CREATE INDEX IF NOT EXISTS idx_scrape_jobs_job_type_created_at
ON scrape_jobs (job_type, created_at DESC)
"""
)
async def enqueue_scrape_job(pool, job_type: str, facility_ids: Iterable[int], requested_by: str | None = None) -> tuple[dict[str, Any], bool]:
if job_type not in SCRAPE_JOB_TYPES:
raise ValueError(f"Ugyldig job_type: {job_type}")
normalized_ids = normalize_facility_ids(facility_ids)
facility_ids_json = json.dumps(normalized_ids)
async with pool.acquire() as conn:
async with conn.transaction():
existing = await conn.fetchrow(
"""
SELECT *
FROM scrape_jobs
WHERE job_type = $1
AND status IN ('pending', 'running')
AND facility_ids = $2::jsonb
ORDER BY created_at DESC
LIMIT 1
""",
job_type,
facility_ids_json,
)
if existing:
return format_scrape_job_row(existing), False
row = await conn.fetchrow(
"""
INSERT INTO scrape_jobs (
job_type,
facility_ids,
total_facilities,
requested_by
)
VALUES ($1, $2::jsonb, $3, $4)
RETURNING *
""",
job_type,
facility_ids_json,
len(normalized_ids),
requested_by,
)
return format_scrape_job_row(row), True
async def list_scrape_jobs(pool, job_type: str | None = None, limit: int = 10) -> list[dict[str, Any]]:
safe_limit = max(1, min(limit, 50))
async with pool.acquire() as conn:
if job_type:
rows = await conn.fetch(
"""
SELECT *
FROM scrape_jobs
WHERE job_type = $1
ORDER BY created_at DESC
LIMIT $2
""",
job_type,
safe_limit,
)
else:
rows = await conn.fetch(
"""
SELECT *
FROM scrape_jobs
ORDER BY created_at DESC
LIMIT $1
""",
safe_limit,
)
return [format_scrape_job_row(row) for row in rows]
async def claim_next_scrape_job(pool, worker_name: str) -> dict[str, Any] | None:
async with pool.acquire() as conn:
async with conn.transaction():
row = await conn.fetchrow(
"""
WITH next_job AS (
SELECT id
FROM scrape_jobs
WHERE status = 'pending'
ORDER BY created_at ASC
FOR UPDATE SKIP LOCKED
LIMIT 1
)
UPDATE scrape_jobs AS job
SET status = 'running',
worker_name = $1,
attempt_count = job.attempt_count + 1,
started_at = COALESCE(job.started_at, NOW()),
updated_at = NOW(),
last_heartbeat_at = NOW(),
error_message = NULL
FROM next_job
WHERE job.id = next_job.id
RETURNING job.*
""",
worker_name,
)
return format_scrape_job_row(row)
async def heartbeat_scrape_job(pool, job_id: int) -> None:
async with pool.acquire() as conn:
await conn.execute(
"""
UPDATE scrape_jobs
SET last_heartbeat_at = NOW(),
updated_at = NOW()
WHERE id = $1
""",
job_id,
)
async def complete_scrape_job(pool, job_id: int, result_summary: dict[str, Any] | None = None) -> None:
payload = json.dumps(result_summary or {})
async with pool.acquire() as conn:
await conn.execute(
"""
UPDATE scrape_jobs
SET status = 'completed',
finished_at = NOW(),
updated_at = NOW(),
last_heartbeat_at = NOW(),
error_message = NULL,
result_summary = $2::jsonb
WHERE id = $1
""",
job_id,
payload,
)
async def fail_scrape_job(pool, job_id: int, error_message: str, result_summary: dict[str, Any] | None = None) -> None:
payload = json.dumps(result_summary or {})
async with pool.acquire() as conn:
await conn.execute(
"""
UPDATE scrape_jobs
SET status = 'failed',
finished_at = NOW(),
updated_at = NOW(),
last_heartbeat_at = NOW(),
error_message = $2,
result_summary = $3::jsonb
WHERE id = $1
""",
job_id,
error_message[:1000],
payload,
)

View file

@ -102,6 +102,10 @@ Merk: Prisene SKAL være tall (integer), ikke tekst. Sett til null hvis du ikke
async def run_scraper(facility_ids=None): async def run_scraper(facility_ids=None):
print("🚀 Starter Medlemskaps-skraperen (Støtter multi-URL)...") print("🚀 Starter Medlemskaps-skraperen (Støtter multi-URL)...")
conn = await asyncpg.connect(DB_URL) conn = await asyncpg.connect(DB_URL)
facilities = []
analyzed_count = 0
saved_count = 0
skipped_count = 0
try: try:
query = "SELECT id, name, medlemskap_url FROM facilities WHERE medlemskap_url IS NOT NULL AND medlemskap_url != ''" query = "SELECT id, name, medlemskap_url FROM facilities WHERE medlemskap_url IS NOT NULL AND medlemskap_url != ''"
@ -132,13 +136,17 @@ async def run_scraper(facility_ids=None):
if len(combined_text) < 50: if len(combined_text) < 50:
print(" ⚠️ Fant for lite tekst, hopper over.") print(" ⚠️ Fant for lite tekst, hopper over.")
skipped_count += 1
continue continue
# Kutter teksten for å ikke overbelaste Gemini (ca 25000 tegn maks) # Kutter teksten for å ikke overbelaste Gemini (ca 25000 tegn maks)
draft_data = analyze_with_gemini(combined_text[:25000], name) draft_data = analyze_with_gemini(combined_text[:25000], name)
if not draft_data: if not draft_data:
skipped_count += 1
continue continue
analyzed_count += 1
print(f" ✅ AI foreslår: Standard: {draft_data.get('foreslatt_standard_pris')} | Rimeligste: {draft_data.get('foreslatt_rimeligste_pris')}") print(f" ✅ AI foreslår: Standard: {draft_data.get('foreslatt_standard_pris')} | Rimeligste: {draft_data.get('foreslatt_rimeligste_pris')}")
@ -149,6 +157,7 @@ async def run_scraper(facility_ids=None):
""", json.dumps(draft_data), fac_id) """, json.dumps(draft_data), fac_id)
print(" 💾 Utkast lagret i databasen!") print(" 💾 Utkast lagret i databasen!")
saved_count += 1
await browser.close() await browser.close()
@ -156,6 +165,13 @@ async def run_scraper(facility_ids=None):
await conn.close() await conn.close()
print("\n🏁 Skraping fullført.") print("\n🏁 Skraping fullført.")
return {
"processed_facilities": len(facilities),
"analyzed_facilities": analyzed_count,
"saved_drafts": saved_count,
"skipped_facilities": skipped_count,
}
if __name__ == "__main__": if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Skrap medlemskapspriser via AI.") parser = argparse.ArgumentParser(description="Skrap medlemskapspriser via AI.")
parser.add_argument("--ids", type=str, help="Kommaseparert liste med facility IDs (eks: 1,5,12)") parser.add_argument("--ids", type=str, help="Kommaseparert liste med facility IDs (eks: 1,5,12)")
@ -165,4 +181,4 @@ if __name__ == "__main__":
if args.ids: if args.ids:
ids_to_scrape = [int(x.strip()) for x in args.ids.split(",")] ids_to_scrape = [int(x.strip()) for x in args.ids.split(",")]
asyncio.run(run_scraper(ids_to_scrape)) asyncio.run(run_scraper(ids_to_scrape))

View file

@ -166,7 +166,12 @@ async def run_daily_scraping(facility_ids=None):
if not facilities: if not facilities:
print("⚠️ Fant ingen anlegg å skrape.") print("⚠️ Fant ingen anlegg å skrape.")
await conn.close() await conn.close()
return return {
"processed_facilities": 0,
"updated_courses": 0,
"warnings": 0,
"successes": 0,
}
changes, warnings, successes = [], [], [] changes, warnings, successes = [], [], []
@ -315,6 +320,12 @@ async def run_daily_scraping(facility_ids=None):
await conn.close() await conn.close()
send_report(changes, warnings, successes) send_report(changes, warnings, successes)
print("🏁 Ferdig.") print("🏁 Ferdig.")
return {
"processed_facilities": len(facilities),
"updated_courses": len(changes),
"warnings": len(warnings),
"successes": len(successes),
}
if __name__ == "__main__": if __name__ == "__main__":
parser = argparse.ArgumentParser(description="TeeOff Status Scraper") parser = argparse.ArgumentParser(description="TeeOff Status Scraper")
@ -329,4 +340,4 @@ if __name__ == "__main__":
print("❌ Feil format på --ids. Må være kommaseparerte tall, f.eks: 1,4,12") print("❌ Feil format på --ids. Må være kommaseparerte tall, f.eks: 1,4,12")
exit(1) exit(1)
asyncio.run(run_daily_scraping(facility_ids_list)) asyncio.run(run_daily_scraping(facility_ids_list))

View file

@ -97,6 +97,10 @@ Merk: Sett foreslatt_vtg_pris til null (null) hvis du ikke finner den. Hvis du i
async def run_vtg_scraper(facility_ids=None): async def run_vtg_scraper(facility_ids=None):
print("🚀 Starter Veien til Golf (VTG) skraperen...") print("🚀 Starter Veien til Golf (VTG) skraperen...")
conn = await asyncpg.connect(DB_URL) conn = await asyncpg.connect(DB_URL)
facilities = []
analyzed_count = 0
saved_count = 0
skipped_count = 0
try: try:
query = "SELECT id, name, vtg_lenke FROM facilities WHERE vtg_lenke IS NOT NULL AND vtg_lenke != ''" query = "SELECT id, name, vtg_lenke FROM facilities WHERE vtg_lenke IS NOT NULL AND vtg_lenke != ''"
@ -126,12 +130,16 @@ async def run_vtg_scraper(facility_ids=None):
if len(combined_text) < 50: if len(combined_text) < 50:
print(" ⚠️ Fant for lite tekst, hopper over.") print(" ⚠️ Fant for lite tekst, hopper over.")
skipped_count += 1
continue continue
draft_data = analyze_vtg_with_gemini(combined_text[:25000], name) draft_data = analyze_vtg_with_gemini(combined_text[:25000], name)
if not draft_data: if not draft_data:
skipped_count += 1
continue continue
analyzed_count += 1
print(f" ✅ AI fant pris: {draft_data.get('foreslatt_vtg_pris')}, og {len(draft_data.get('foreslatt_vtg_datoer', []))} datoer.") print(f" ✅ AI fant pris: {draft_data.get('foreslatt_vtg_pris')}, og {len(draft_data.get('foreslatt_vtg_datoer', []))} datoer.")
@ -142,6 +150,7 @@ async def run_vtg_scraper(facility_ids=None):
""", json.dumps(draft_data), fac_id) """, json.dumps(draft_data), fac_id)
print(" 💾 VTG-utkast lagret i databasen!") print(" 💾 VTG-utkast lagret i databasen!")
saved_count += 1
await browser.close() await browser.close()
@ -149,6 +158,13 @@ async def run_vtg_scraper(facility_ids=None):
await conn.close() await conn.close()
print("\n🏁 Skraping fullført.") print("\n🏁 Skraping fullført.")
return {
"processed_facilities": len(facilities),
"analyzed_facilities": analyzed_count,
"saved_drafts": saved_count,
"skipped_facilities": skipped_count,
}
if __name__ == "__main__": if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Skrap VTG via AI.") parser = argparse.ArgumentParser(description="Skrap VTG via AI.")
parser.add_argument("--ids", type=str, help="Kommaseparert liste med facility IDs (eks: 1,5,12)") parser.add_argument("--ids", type=str, help="Kommaseparert liste med facility IDs (eks: 1,5,12)")
@ -158,4 +174,4 @@ if __name__ == "__main__":
if args.ids: if args.ids:
ids_to_scrape = [int(x.strip()) for x in args.ids.split(",")] ids_to_scrape = [int(x.strip()) for x in args.ids.split(",")]
asyncio.run(run_vtg_scraper(ids_to_scrape)) asyncio.run(run_vtg_scraper(ids_to_scrape))

77
backend/worker.py Executable file
View file

@ -0,0 +1,77 @@
import asyncio
import os
import traceback
import asyncpg
from dotenv import load_dotenv
from scrape_job_runner import run_scrape_job
from scrape_jobs import (
claim_next_scrape_job,
complete_scrape_job,
ensure_scrape_jobs_table,
fail_scrape_job,
heartbeat_scrape_job,
)
load_dotenv()
DB_URL = os.getenv("DATABASE_URL", "postgresql://teeoff_admin:teeoff_secret_password@db:5432/teeoff")
WORKER_NAME = os.getenv("SCRAPE_WORKER_NAME", f"scrape-worker-{os.getpid()}")
POLL_INTERVAL_SECONDS = int(os.getenv("SCRAPE_WORKER_POLL_INTERVAL", "5"))
HEARTBEAT_INTERVAL_SECONDS = int(os.getenv("SCRAPE_WORKER_HEARTBEAT_INTERVAL", "15"))
async def heartbeat_loop(pool, job_id: int, stop_event: asyncio.Event) -> None:
while not stop_event.is_set():
try:
await asyncio.wait_for(stop_event.wait(), timeout=HEARTBEAT_INTERVAL_SECONDS)
except asyncio.TimeoutError:
try:
await heartbeat_scrape_job(pool, job_id)
except Exception as exc:
print(f"⚠️ Klarte ikke å oppdatere heartbeat for jobb {job_id}: {exc}")
async def main() -> None:
print(f"🚀 Starter scrape worker: {WORKER_NAME}")
pool = await asyncpg.create_pool(DB_URL, min_size=1, max_size=5, command_timeout=60)
try:
async with pool.acquire() as conn:
await ensure_scrape_jobs_table(conn)
while True:
job = await claim_next_scrape_job(pool, WORKER_NAME)
if not job:
await asyncio.sleep(POLL_INTERVAL_SECONDS)
continue
job_id = job["id"]
print(f"🎯 Worker plukket jobb #{job_id} ({job['job_type']}) for {len(job.get('facility_ids', []))} anlegg")
stop_event = asyncio.Event()
heartbeat_task = asyncio.create_task(heartbeat_loop(pool, job_id, stop_event))
try:
result_summary = await run_scrape_job(job)
await complete_scrape_job(pool, job_id, result_summary)
print(f"✅ Jobb #{job_id} fullført")
except Exception as exc:
trace = traceback.format_exc(limit=5)
print(f"🔥 Jobb #{job_id} feilet: {exc}\n{trace}")
await fail_scrape_job(
pool,
job_id,
str(exc),
{"traceback": trace},
)
finally:
stop_event.set()
await heartbeat_task
finally:
await pool.close()
if __name__ == "__main__":
asyncio.run(main())

View file

@ -25,6 +25,16 @@ services:
- db - db
restart: unless-stopped restart: unless-stopped
worker:
build: ./backend
container_name: teeoff_worker
command: python worker.py
volumes:
- ./backend:/app
depends_on:
- db
restart: unless-stopped
frontend: frontend:
build: ./frontend build: ./frontend
container_name: teeoff_frontend container_name: teeoff_frontend
@ -38,4 +48,4 @@ services:
restart: unless-stopped restart: unless-stopped
volumes: volumes:
teeoff_db_data: teeoff_db_data:

View file

@ -3,11 +3,42 @@
* TEE OFF ADMIN DASHBOARD v4.0 - KONTROLLPANEL * TEE OFF ADMIN DASHBOARD v4.0 - KONTROLLPANEL
*/ */
import { useState, useEffect, useMemo } from 'react'; import { useEffect, useMemo, useRef, useState } from 'react';
import { API_URL } from "@/config/constants"; import { API_URL } from "@/config/constants";
import ScrapeMethodSelect from "@/components/ScrapeMethodSelect"; import ScrapeMethodSelect from "@/components/ScrapeMethodSelect";
import Link from 'next/link'; import Link from 'next/link';
type AdminTab = 'banestatus' | 'medlemskap' | 'greenfee' | 'vtg';
type ScrapeJobStatus = 'pending' | 'running' | 'completed' | 'failed';
type ScrapeJob = {
id: number;
job_type: AdminTab;
status: ScrapeJobStatus;
facility_ids: number[];
total_facilities: number;
error_message?: string | null;
result_summary?: Record<string, number | string | null>;
created_at?: string | null;
started_at?: string | null;
finished_at?: string | null;
};
const JOB_LABELS: Record<AdminTab, string> = {
banestatus: 'Banestatus',
medlemskap: 'Medlemskap',
greenfee: 'Greenfee',
vtg: 'VTG',
};
const JOB_STATUS_LABELS: Record<ScrapeJobStatus, string> = {
pending: 'I kø',
running: 'Kjører',
completed: 'Fullført',
failed: 'Feilet',
};
const InlineEdit = ({ facilityId, field, initialValue, onSave }: { facilityId: number, field: string, initialValue: string, onSave: (id: number, field: string, val: string) => void }) => { const InlineEdit = ({ facilityId, field, initialValue, onSave }: { facilityId: number, field: string, initialValue: string, onSave: (id: number, field: string, val: string) => void }) => {
const [isEditing, setIsEditing] = useState(false); const [isEditing, setIsEditing] = useState(false);
const [value, setValue] = useState(initialValue || ''); const [value, setValue] = useState(initialValue || '');
@ -45,13 +76,15 @@ export default function AdminDashboard() {
const [facilities, setFacilities] = useState<any[]>([]); const [facilities, setFacilities] = useState<any[]>([]);
const [loading, setLoading] = useState(true); const [loading, setLoading] = useState(true);
const [selectedFacilities, setSelectedFacilities] = useState<number[]>([]); const [selectedFacilities, setSelectedFacilities] = useState<number[]>([]);
const [isScraping, setIsScraping] = useState(false); const [scrapeJobs, setScrapeJobs] = useState<ScrapeJob[]>([]);
const [isQueueing, setIsQueueing] = useState(false);
const [isSidebarCollapsed, setIsSidebarCollapsed] = useState(false); const [isSidebarCollapsed, setIsSidebarCollapsed] = useState(false);
const [editingFacility, setEditingFacility] = useState<any | null>(null); const [editingFacility, setEditingFacility] = useState<any | null>(null);
const [activeTab, setActiveTab] = useState<'banestatus' | 'medlemskap' | 'greenfee' | 'vtg'>('banestatus'); const [activeTab, setActiveTab] = useState<AdminTab>('banestatus');
const [statusFilter, setStatusFilter] = useState('alle'); const [statusFilter, setStatusFilter] = useState('alle');
const [editForm, setEditForm] = useState({ scrape_status_url: '', scrape_status_selector: '', scrape_method: '', ai_instruction: '', courses: [] as any[] }); const [editForm, setEditForm] = useState({ scrape_status_url: '', scrape_status_selector: '', scrape_method: '', ai_instruction: '', courses: [] as any[] });
const [isSaving, setIsSaving] = useState(false); const [isSaving, setIsSaving] = useState(false);
const latestJobStateRef = useRef<string | null>(null);
const fetchFacilities = () => { const fetchFacilities = () => {
fetch(`${API_URL}/facilities`) fetch(`${API_URL}/facilities`)
@ -63,15 +96,58 @@ export default function AdminDashboard() {
.catch(() => setLoading(false)); .catch(() => setLoading(false));
}; };
useEffect(() => { fetchFacilities(); }, []); const fetchScrapeJobs = (tab: AdminTab = activeTab) => {
fetch(`${API_URL}/admin/scrape-jobs?job_type=${tab}&limit=5`)
.then(res => res.json())
.then(data => {
setScrapeJobs(Array.isArray(data) ? data : []);
})
.catch(() => setScrapeJobs([]));
};
const activeJob = useMemo(
() => scrapeJobs.find(job => job.status === 'pending' || job.status === 'running') || null,
[scrapeJobs]
);
const latestJob = scrapeJobs[0] || null;
const isScraping = !!activeJob;
useEffect(() => { useEffect(() => {
let interval: NodeJS.Timeout; fetchFacilities();
if (isScraping) interval = setInterval(() => fetchFacilities(), 10000); fetchScrapeJobs('banestatus');
}, []);
useEffect(() => {
const interval = setInterval(() => fetchScrapeJobs(activeTab), 5000);
return () => clearInterval(interval);
}, [activeTab]);
useEffect(() => {
setSelectedFacilities([]);
fetchScrapeJobs(activeTab);
}, [activeTab]);
useEffect(() => {
if (!isScraping) return;
const interval = setInterval(() => fetchFacilities(), 10000);
return () => clearInterval(interval); return () => clearInterval(interval);
}, [isScraping]); }, [isScraping]);
useEffect(() => { setSelectedFacilities([]); }, [activeTab]); useEffect(() => {
const currentState = latestJob ? `${latestJob.id}:${latestJob.status}` : null;
const previousState = latestJobStateRef.current;
latestJobStateRef.current = currentState;
if (
previousState &&
previousState !== currentState &&
latestJob &&
(latestJob.status === 'completed' || latestJob.status === 'failed')
) {
fetchFacilities();
fetchScrapeJobs(activeTab);
}
}, [activeTab, latestJob]);
const filteredFacilities = useMemo(() => { const filteredFacilities = useMemo(() => {
if (statusFilter === 'alle') return facilities; if (statusFilter === 'alle') return facilities;
@ -89,6 +165,15 @@ export default function AdminDashboard() {
}).filter(facility => facility.course_statuses && facility.course_statuses.length > 0); }).filter(facility => facility.course_statuses && facility.course_statuses.length > 0);
}, [facilities, statusFilter]); }, [facilities, statusFilter]);
const latestJobSummary = useMemo(() => {
if (!latestJob?.result_summary) return '';
return Object.entries(latestJob.result_summary)
.filter(([, value]) => value !== null && value !== undefined && value !== '')
.map(([key, value]) => `${key.replaceAll('_', ' ')}: ${value}`)
.join(' • ');
}, [latestJob]);
const handleSelectAll = (e: React.ChangeEvent<HTMLInputElement>) => { const handleSelectAll = (e: React.ChangeEvent<HTMLInputElement>) => {
if (e.target.checked) setSelectedFacilities(filteredFacilities.map(f => f.id)); if (e.target.checked) setSelectedFacilities(filteredFacilities.map(f => f.id));
else setSelectedFacilities([]); else setSelectedFacilities([]);
@ -115,8 +200,8 @@ export default function AdminDashboard() {
}; };
const handleRunScrapers = async () => { const handleRunScrapers = async () => {
if (isScraping) { setIsScraping(false); return; } if (selectedFacilities.length === 0) return;
setIsScraping(true); setIsQueueing(true);
const endpoint = activeTab === 'banestatus' ? '/admin/run-scraper' : const endpoint = activeTab === 'banestatus' ? '/admin/run-scraper' :
activeTab === 'medlemskap' ? '/admin/run-membership-scraper' : activeTab === 'medlemskap' ? '/admin/run-membership-scraper' :
activeTab === 'greenfee' ? '/admin/run-greenfee-scraper' : activeTab === 'greenfee' ? '/admin/run-greenfee-scraper' :
@ -127,13 +212,14 @@ export default function AdminDashboard() {
headers: { 'Content-Type': 'application/json' }, headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ facility_ids: selectedFacilities }) body: JSON.stringify({ facility_ids: selectedFacilities })
}); });
if (!response.ok) throw new Error("Kunne ikke starte skraping"); const data = await response.json();
const timeoutMs = Math.max(selectedFacilities.length * 40 * 1000, 60000); if (!response.ok) throw new Error(data.detail || "Kunne ikke starte skraping");
setSelectedFacilities([]); setSelectedFacilities([]);
setTimeout(() => setIsScraping(false), timeoutMs); fetchScrapeJobs(activeTab);
} catch (error) { } catch (error) {
alert(`Feil ved start av ${activeTab}-skraperen.`); alert(`Feil ved start av ${activeTab}-skraperen.`);
setIsScraping(false); } finally {
setIsQueueing(false);
} }
}; };
@ -280,14 +366,63 @@ export default function AdminDashboard() {
<button <button
onClick={handleRunScrapers} onClick={handleRunScrapers}
disabled={selectedFacilities.length === 0 && !isScraping} disabled={selectedFacilities.length === 0 || isQueueing}
className={`text-white px-6 py-4 rounded-2xl text-[10px] font-black uppercase tracking-widest shadow-xl transition-all whitespace-nowrap className={`text-white px-6 py-4 rounded-2xl text-[10px] font-black uppercase tracking-widest shadow-xl transition-all whitespace-nowrap
${isScraping ? 'bg-yellow-500 animate-pulse cursor-pointer hover:bg-yellow-600' : 'bg-[#8bc34a] hover:scale-105 disabled:bg-gray-200 disabled:text-gray-400 disabled:cursor-not-allowed'}`} ${isQueueing ? 'bg-yellow-500 animate-pulse' : 'bg-[#8bc34a] hover:scale-105 disabled:bg-gray-200 disabled:text-gray-400 disabled:cursor-not-allowed'}`}
> >
{isScraping ? '🤖 Skraper... Klikk for å avslutte' : `Kjør ${activeTab}-skrapere (${selectedFacilities.length})`} {isQueueing ? 'Legger i kø...' : isScraping ? `Legg ${activeTab}-skraping i kø (${selectedFacilities.length})` : `Kjør ${activeTab}-skrapere (${selectedFacilities.length})`}
</button> </button>
</header> </header>
{latestJob && latestJob.job_type === activeTab && (
<div className={`mb-8 rounded-[1.75rem] border p-5 md:p-6 animate-fade-in ${
latestJob.status === 'failed'
? 'bg-red-50 border-red-100'
: latestJob.status === 'completed'
? 'bg-[#f1f7ed] border-[#d8e8c8]'
: 'bg-amber-50 border-amber-100'
}`}>
<div className="flex flex-col md:flex-row md:items-start md:justify-between gap-4">
<div className="space-y-2">
<p className="text-[10px] font-black uppercase tracking-[0.2em] text-gray-500">
{JOB_LABELS[activeTab]} jobb #{latestJob.id}
</p>
<div className="flex flex-wrap items-center gap-3">
<span className={`inline-flex px-3 py-1 rounded-xl text-[10px] font-black uppercase tracking-widest ${
latestJob.status === 'failed'
? 'bg-red-100 text-red-700'
: latestJob.status === 'completed'
? 'bg-[#d8e8c8] text-[#11280f]'
: 'bg-amber-100 text-amber-700 animate-pulse'
}`}>
{JOB_STATUS_LABELS[latestJob.status]}
</span>
<span className="text-xs font-bold text-[#11280f]">
{latestJob.total_facilities} anlegg
</span>
{latestJob.created_at && (
<span className="text-xs text-gray-500">
Opprettet {new Date(latestJob.created_at).toLocaleString('nb-NO')}
</span>
)}
</div>
{latestJobSummary && (
<p className="text-xs text-gray-600 leading-relaxed">{latestJobSummary}</p>
)}
{latestJob.error_message && (
<p className="text-xs text-red-600 leading-relaxed">{latestJob.error_message}</p>
)}
</div>
<button
onClick={() => fetchScrapeJobs(activeTab)}
className="px-4 py-2 rounded-xl bg-white text-[10px] font-black uppercase tracking-widest text-gray-500 border border-gray-200 hover:border-[#8bc34a] hover:text-[#11280f] transition-colors"
>
Oppdater status
</button>
</div>
</div>
)}
{/* VELDIG SYNLIGE FANER */} {/* VELDIG SYNLIGE FANER */}
<div className="flex gap-2 mb-8 border-b-2 border-gray-100 pb-0 overflow-x-auto hide-scrollbar"> <div className="flex gap-2 mb-8 border-b-2 border-gray-100 pb-0 overflow-x-auto hide-scrollbar">
<button onClick={() => setActiveTab('banestatus')} className={`px-6 py-3 text-xs font-black uppercase tracking-widest rounded-t-xl transition-all whitespace-nowrap ${activeTab === 'banestatus' ? 'bg-[#8bc34a] text-white shadow-md' : 'bg-gray-50 text-gray-500 hover:bg-gray-200'}`}>Banestatus</button> <button onClick={() => setActiveTab('banestatus')} className={`px-6 py-3 text-xs font-black uppercase tracking-widest rounded-t-xl transition-all whitespace-nowrap ${activeTab === 'banestatus' ? 'bg-[#8bc34a] text-white shadow-md' : 'bg-gray-50 text-gray-500 hover:bg-gray-200'}`}>Banestatus</button>
@ -465,4 +600,4 @@ export default function AdminDashboard() {
</main> </main>
</div> </div>
); );
} }