Etter første innblanding fra Codex

This commit is contained in:
Erol 2026-04-10 18:37:33 +02:00
parent 97ca464aef
commit eb0f7d2907
10 changed files with 620 additions and 115 deletions

View file

@ -9,7 +9,7 @@ LOV: Aldri trunker eller slett logikk for "effektivitet".
---------------------------------------------------------------------------
"""
from fastapi import FastAPI, HTTPException, Response, Cookie, Depends, Request, BackgroundTasks
from fastapi import FastAPI, HTTPException, Response, Request, Query
from fastapi.middleware.cors import CORSMiddleware
from contextlib import asynccontextmanager
import asyncpg
@ -21,10 +21,15 @@ from jose import jwt, JWTError
from passlib.context import CryptContext
from dotenv import load_dotenv
# NYE IMPORTER FOR ADMIN PANELET OG BAKGRUNNSJOBBER
from pydantic import BaseModel
from typing import Optional, List, Any
import subprocess
from scrape_jobs import (
SCRAPE_JOB_TYPES,
enqueue_scrape_job,
ensure_scrape_jobs_table,
list_scrape_jobs,
)
load_dotenv()
@ -92,7 +97,10 @@ def format_row(row):
d = dict(row)
for key in ['status_updated_at', 'created_at', 'slope_valid_until', 'membership_updated_at']:
for key in [
'status_updated_at', 'created_at', 'slope_valid_until',
'membership_updated_at', 'greenfee_updated_at', 'vtg_updated_at'
]:
if isinstance(d.get(key), (date, datetime)):
d[key] = d[key].isoformat()
@ -101,7 +109,8 @@ def format_row(row):
'faqs', 'shotzoom', 'social_links', 'holes', 'golfpakker', 'cooperating_clubs', 'vtg_datoer'
]
json_dict_fields = [
'amenities', 'vtg', 'nsg_data', 'golfamore_data', 'membership_draft'
'amenities', 'vtg', 'nsg_data', 'golfamore_data',
'membership_draft', 'greenfee_draft', 'vtg_draft'
]
for field in json_list_fields:
@ -132,38 +141,24 @@ def format_row(row):
return d
# --- BAKGRUNNSARBEIDER: FUNKSJON SOM KJØRER SKRAPEREN I BAKGRUNNEN ---
def run_scrape_worker(facility_ids: List[int]):
"""
Kjører selve skraping-scriptet i bakgrunnen.
Slik kan frontenden et umiddelbart svar, mens skraperen jobber.
"""
print(f"🔄 STARTER BAKGRUNNSSKRAPING FOR FØLGENDE IDER: {facility_ids}")
try:
ids_arg = ",".join(map(str, facility_ids))
# NYTT: Bruker "python -u" for LIVE logging, og fjerner "> /dev/null 2>&1"
command = f"python -u scrape_status.py --ids {ids_arg}"
subprocess.run(command, shell=True, check=True)
print(f"✅ BAKGRUNNSSKRAPING FULLFØRT FOR IDER: {facility_ids}")
except subprocess.CalledProcessError as e:
print(f"❌ FEIL UNDER BAKGRUNNSSKRAPING: {e}")
except Exception as e:
print(f"🔥 UFORUTSETT FEIL UNDER BAKGRUNNSSKRAPING: {e}")
async def queue_scrape_job(job_type: str, facility_ids: List[int]):
if job_type not in SCRAPE_JOB_TYPES:
raise HTTPException(status_code=400, detail=f"Ugyldig jobbtype: {job_type}")
if not facility_ids:
raise HTTPException(status_code=400, detail="Ingen anleggs-IDer ble oppgitt.")
def run_membership_worker(facility_ids: List[int]):
"""Kjører medlemskap-skraperen i bakgrunnen."""
print(f"🔄 STARTER MEDLEMSKAP-SKRAPING FOR IDER: {facility_ids}")
try:
ids_arg = ",".join(map(str, facility_ids))
command = f"python -u scrape_membership.py --ids {ids_arg}"
subprocess.run(command, shell=True, check=True)
print(f"✅ MEDLEMSKAP-SKRAPING FULLFØRT FOR IDER: {facility_ids}")
except Exception as e:
print(f"🔥 FEIL UNDER MEDLEMSKAP-SKRAPING: {e}")
job, was_created = await enqueue_scrape_job(app.state.pool, job_type, facility_ids)
status = "queued" if was_created else "already_queued"
message = (
f"{job_type.capitalize()}-skraping for {len(job['facility_ids'])} anlegg ble lagt i kø."
if was_created
else f"Fant allerede en aktiv {job_type}-jobb for samme anlegg."
)
return {
"status": status,
"message": message,
"job": job,
}
@asynccontextmanager
@ -177,6 +172,8 @@ async def lifespan(app: FastAPI):
max_size=20,
command_timeout=60
)
async with app.state.pool.acquire() as conn:
await ensure_scrape_jobs_table(conn)
print("✅ Database tilkoblet og pool opprettet")
except Exception as e:
print(f"❌ Databasefeil under oppstart: {e}")
@ -459,31 +456,26 @@ async def update_facility_full(facility_id: int, request: Request):
return {"status": "success", "message": "Anlegg, baner og scorekort ble oppdatert."}
# --- NYTT ADMIN ENDPOINT: KJØRER SKRAPEREN FOR VALGTE IDER ---
@app.get("/api/admin/scrape-jobs")
async def get_scrape_jobs(job_type: Optional[str] = Query(default=None), limit: int = Query(default=10, ge=1, le=50)):
"""Henter siste scrape-jobber, evt. filtrert på type."""
if job_type and job_type not in SCRAPE_JOB_TYPES:
raise HTTPException(status_code=400, detail="Ugyldig jobbtype.")
return await list_scrape_jobs(app.state.pool, job_type=job_type, limit=limit)
@app.post("/api/admin/run-scraper")
async def run_scraper_endpoint(request: ScrapeRunRequest, background_tasks: BackgroundTasks):
"""
Tar imot IDer for skraping, og starter en bakgrunnsjobb.
Gir et umiddelbart svar tilbake til frontenden slik at den slipper å vente.
"""
if not request.facility_ids:
raise HTTPException(status_code=400, detail="Ingen anleggs-IDer ble oppgitt.")
print(f"📡 API mottok forespørsel om å kjøre skraping for IDer: {request.facility_ids}")
background_tasks.add_task(run_scrape_worker, request.facility_ids)
return {"status": "queued", "message": f"Skraping for {len(request.facility_ids)} anlegg ble lagt i kø."}
async def run_scraper_endpoint(request: ScrapeRunRequest):
"""Legger banestatus-skraping i en persistent jobbkø."""
print(f"📡 API mottok forespørsel om å kjøre banestatus-skraping for IDer: {request.facility_ids}")
return await queue_scrape_job("banestatus", request.facility_ids)
@app.post("/api/admin/run-membership-scraper")
async def run_membership_scraper_endpoint(request: ScrapeRunRequest, background_tasks: BackgroundTasks):
async def run_membership_scraper_endpoint(request: ScrapeRunRequest):
"""Tar imot IDer for medlemskapsskraping og legger jobben i kø."""
if not request.facility_ids:
raise HTTPException(status_code=400, detail="Ingen anleggs-IDer ble oppgitt.")
print(f"📡 API mottok forespørsel om medlemskapsskraping for IDer: {request.facility_ids}")
background_tasks.add_task(run_membership_worker, request.facility_ids)
return {"status": "queued", "message": f"Medlemskapsskraping for {len(request.facility_ids)} anlegg ble lagt i kø."}
return await queue_scrape_job("medlemskap", request.facility_ids)
@app.get("/api/health")
async def health_check():
@ -585,25 +577,11 @@ async def approve_greenfee_bulk(request: BulkGreenfeeRequest):
""", json.dumps(approval.greenfee), approval.facility_id)
return {"status": "success"}
def run_greenfee_worker(facility_ids: List[int]):
"""Kjører greenfee-skraperen i bakgrunnen."""
print(f"🔄 STARTER GREENFEE-SKRAPING FOR IDER: {facility_ids}")
try:
import subprocess
ids_arg = ",".join(map(str, facility_ids))
command = f"python -u scrape_greenfee.py --ids {ids_arg}"
subprocess.run(command, shell=True, check=True)
print(f"✅ GREENFEE-SKRAPING FULLFØRT FOR IDER: {facility_ids}")
except Exception as e:
print(f"🔥 FEIL UNDER GREENFEE-SKRAPING: {e}")
@app.post("/api/admin/run-greenfee-scraper")
async def run_greenfee_scraper_endpoint(request: ScrapeRunRequest, background_tasks: BackgroundTasks):
async def run_greenfee_scraper_endpoint(request: ScrapeRunRequest):
"""Tar imot IDer for greenfeeskraping og legger jobben i kø."""
if not request.facility_ids:
raise HTTPException(status_code=400, detail="Ingen anleggs-IDer ble oppgitt.")
background_tasks.add_task(run_greenfee_worker, request.facility_ids)
return {"status": "queued", "message": "Skraping startet"}
print(f"📡 API mottok forespørsel om greenfee-skraping for IDer: {request.facility_ids}")
return await queue_scrape_job("greenfee", request.facility_ids)
# --- VEIEN TIL GOLF (VTG) "VASKERI" ENDEPUNKTER ---
@ -638,26 +616,12 @@ async def approve_vtg_bulk(request: BulkVtgRequest):
""", approval.vtg_pris, approval.vtg_beskrivelse, datoer_json, approval.facility_id)
return {"status": "success"}
def run_vtg_worker(facility_ids: List[int]):
"""Kjører VTG-skraperen i bakgrunnen."""
print(f"🔄 STARTER VTG-SKRAPING FOR IDER: {facility_ids}")
try:
import subprocess
ids_arg = ",".join(map(str, facility_ids))
command = f"python -u scrape_vtg.py --ids {ids_arg}"
subprocess.run(command, shell=True, check=True)
print(f"✅ VTG-SKRAPING FULLFØRT FOR IDER: {facility_ids}")
except Exception as e:
print(f"🔥 FEIL UNDER VTG-SKRAPING: {e}")
@app.post("/api/admin/run-vtg-scraper")
async def run_vtg_scraper_endpoint(request: ScrapeRunRequest, background_tasks: BackgroundTasks):
async def run_vtg_scraper_endpoint(request: ScrapeRunRequest):
"""Tar imot IDer for VTG-skraping og legger jobben i kø."""
if not request.facility_ids:
raise HTTPException(status_code=400, detail="Ingen anleggs-IDer ble oppgitt.")
background_tasks.add_task(run_vtg_worker, request.facility_ids)
return {"status": "queued", "message": "Skraping startet"}
print(f"📡 API mottok forespørsel om VTG-skraping for IDer: {request.facility_ids}")
return await queue_scrape_job("vtg", request.facility_ids)
if __name__ == "__main__":
import uvicorn
uvicorn.run(app, host="0.0.0.0", port=8000)
uvicorn.run(app, host="0.0.0.0", port=8000)

View file

@ -107,6 +107,10 @@ Returner KUN et gyldig JSON-objekt med nøyaktig følgende struktur:
async def run_greenfee_scraper(facility_ids=None):
print("🚀 Starter Greenfee-skraperen...")
conn = await asyncpg.connect(DB_URL)
facilities = []
analyzed_count = 0
saved_count = 0
skipped_count = 0
try:
query = "SELECT id, name, greenfee_url FROM facilities WHERE greenfee_url IS NOT NULL AND greenfee_url != ''"
@ -136,12 +140,16 @@ async def run_greenfee_scraper(facility_ids=None):
if len(combined_text) < 50:
print(" ⚠️ Fant for lite tekst, hopper over.")
skipped_count += 1
continue
draft_data = analyze_greenfee_with_gemini(combined_text[:25000], name)
if not draft_data:
skipped_count += 1
continue
analyzed_count += 1
funnet_priser = len(draft_data.get('foreslatt_greenfee', []))
funnet_klubber = len(draft_data.get('foreslatt_avtaleklubber', []))
@ -154,6 +162,7 @@ async def run_greenfee_scraper(facility_ids=None):
""", json.dumps(draft_data), fac_id)
print(" 💾 Greenfee-utkast lagret i databasen!")
saved_count += 1
await browser.close()
@ -161,6 +170,13 @@ async def run_greenfee_scraper(facility_ids=None):
await conn.close()
print("\n🏁 Skraping fullført.")
return {
"processed_facilities": len(facilities),
"analyzed_facilities": analyzed_count,
"saved_drafts": saved_count,
"skipped_facilities": skipped_count,
}
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Skrap greenfeepriser via AI.")
parser.add_argument("--ids", type=str, help="Kommaseparert liste med facility IDs (eks: 1,5,12)")
@ -170,4 +186,4 @@ if __name__ == "__main__":
if args.ids:
ids_to_scrape = [int(x.strip()) for x in args.ids.split(",")]
asyncio.run(run_greenfee_scraper(ids_to_scrape))
asyncio.run(run_greenfee_scraper(ids_to_scrape))

24
backend/scrape_job_runner.py Executable file
View file

@ -0,0 +1,24 @@
from typing import Any
from scrape_greenfee import run_greenfee_scraper
from scrape_membership import run_scraper as run_membership_scraper
from scrape_status import run_daily_scraping
from scrape_vtg import run_vtg_scraper
async def run_scrape_job(job: dict[str, Any]) -> dict[str, Any]:
job_type = job["job_type"]
facility_ids = job.get("facility_ids") or []
if job_type == "banestatus":
result = await run_daily_scraping(facility_ids)
elif job_type == "medlemskap":
result = await run_membership_scraper(facility_ids)
elif job_type == "greenfee":
result = await run_greenfee_scraper(facility_ids)
elif job_type == "vtg":
result = await run_vtg_scraper(facility_ids)
else:
raise ValueError(f"Ukjent scrape-jobbtype: {job_type}")
return result or {}

236
backend/scrape_jobs.py Executable file
View file

@ -0,0 +1,236 @@
import json
from datetime import date, datetime
from typing import Any, Iterable
SCRAPE_JOB_TYPES = ("banestatus", "medlemskap", "greenfee", "vtg")
SCRAPE_JOB_STATUSES = ("pending", "running", "completed", "failed")
def normalize_facility_ids(facility_ids: Iterable[int]) -> list[int]:
cleaned = {
int(facility_id)
for facility_id in facility_ids
if str(facility_id).strip()
}
return sorted(facility_id for facility_id in cleaned if facility_id > 0)
def _parse_json(value: Any, fallback: Any) -> Any:
if value is None:
return fallback
if isinstance(value, str):
try:
return json.loads(value)
except json.JSONDecodeError:
return fallback
return value
def format_scrape_job_row(row: Any) -> dict[str, Any] | None:
if row is None:
return None
data = dict(row)
for key in ("created_at", "started_at", "finished_at", "updated_at", "last_heartbeat_at"):
if isinstance(data.get(key), (date, datetime)):
data[key] = data[key].isoformat()
facility_ids = _parse_json(data.get("facility_ids"), [])
data["facility_ids"] = facility_ids if isinstance(facility_ids, list) else []
result_summary = _parse_json(data.get("result_summary"), {})
data["result_summary"] = result_summary if isinstance(result_summary, dict) else {}
return data
async def ensure_scrape_jobs_table(conn) -> None:
await conn.execute(
"""
CREATE TABLE IF NOT EXISTS scrape_jobs (
id SERIAL PRIMARY KEY,
job_type VARCHAR(50) NOT NULL CHECK (job_type IN ('banestatus', 'medlemskap', 'greenfee', 'vtg')),
facility_ids JSONB NOT NULL DEFAULT '[]'::jsonb,
total_facilities INTEGER NOT NULL DEFAULT 0,
status VARCHAR(20) NOT NULL DEFAULT 'pending' CHECK (status IN ('pending', 'running', 'completed', 'failed')),
requested_by TEXT,
worker_name TEXT,
attempt_count INTEGER NOT NULL DEFAULT 0,
error_message TEXT,
result_summary JSONB NOT NULL DEFAULT '{}'::jsonb,
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
started_at TIMESTAMPTZ,
finished_at TIMESTAMPTZ,
updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
last_heartbeat_at TIMESTAMPTZ
)
"""
)
await conn.execute(
"""
CREATE INDEX IF NOT EXISTS idx_scrape_jobs_status_created_at
ON scrape_jobs (status, created_at)
"""
)
await conn.execute(
"""
CREATE INDEX IF NOT EXISTS idx_scrape_jobs_job_type_created_at
ON scrape_jobs (job_type, created_at DESC)
"""
)
async def enqueue_scrape_job(pool, job_type: str, facility_ids: Iterable[int], requested_by: str | None = None) -> tuple[dict[str, Any], bool]:
if job_type not in SCRAPE_JOB_TYPES:
raise ValueError(f"Ugyldig job_type: {job_type}")
normalized_ids = normalize_facility_ids(facility_ids)
facility_ids_json = json.dumps(normalized_ids)
async with pool.acquire() as conn:
async with conn.transaction():
existing = await conn.fetchrow(
"""
SELECT *
FROM scrape_jobs
WHERE job_type = $1
AND status IN ('pending', 'running')
AND facility_ids = $2::jsonb
ORDER BY created_at DESC
LIMIT 1
""",
job_type,
facility_ids_json,
)
if existing:
return format_scrape_job_row(existing), False
row = await conn.fetchrow(
"""
INSERT INTO scrape_jobs (
job_type,
facility_ids,
total_facilities,
requested_by
)
VALUES ($1, $2::jsonb, $3, $4)
RETURNING *
""",
job_type,
facility_ids_json,
len(normalized_ids),
requested_by,
)
return format_scrape_job_row(row), True
async def list_scrape_jobs(pool, job_type: str | None = None, limit: int = 10) -> list[dict[str, Any]]:
safe_limit = max(1, min(limit, 50))
async with pool.acquire() as conn:
if job_type:
rows = await conn.fetch(
"""
SELECT *
FROM scrape_jobs
WHERE job_type = $1
ORDER BY created_at DESC
LIMIT $2
""",
job_type,
safe_limit,
)
else:
rows = await conn.fetch(
"""
SELECT *
FROM scrape_jobs
ORDER BY created_at DESC
LIMIT $1
""",
safe_limit,
)
return [format_scrape_job_row(row) for row in rows]
async def claim_next_scrape_job(pool, worker_name: str) -> dict[str, Any] | None:
async with pool.acquire() as conn:
async with conn.transaction():
row = await conn.fetchrow(
"""
WITH next_job AS (
SELECT id
FROM scrape_jobs
WHERE status = 'pending'
ORDER BY created_at ASC
FOR UPDATE SKIP LOCKED
LIMIT 1
)
UPDATE scrape_jobs AS job
SET status = 'running',
worker_name = $1,
attempt_count = job.attempt_count + 1,
started_at = COALESCE(job.started_at, NOW()),
updated_at = NOW(),
last_heartbeat_at = NOW(),
error_message = NULL
FROM next_job
WHERE job.id = next_job.id
RETURNING job.*
""",
worker_name,
)
return format_scrape_job_row(row)
async def heartbeat_scrape_job(pool, job_id: int) -> None:
async with pool.acquire() as conn:
await conn.execute(
"""
UPDATE scrape_jobs
SET last_heartbeat_at = NOW(),
updated_at = NOW()
WHERE id = $1
""",
job_id,
)
async def complete_scrape_job(pool, job_id: int, result_summary: dict[str, Any] | None = None) -> None:
payload = json.dumps(result_summary or {})
async with pool.acquire() as conn:
await conn.execute(
"""
UPDATE scrape_jobs
SET status = 'completed',
finished_at = NOW(),
updated_at = NOW(),
last_heartbeat_at = NOW(),
error_message = NULL,
result_summary = $2::jsonb
WHERE id = $1
""",
job_id,
payload,
)
async def fail_scrape_job(pool, job_id: int, error_message: str, result_summary: dict[str, Any] | None = None) -> None:
payload = json.dumps(result_summary or {})
async with pool.acquire() as conn:
await conn.execute(
"""
UPDATE scrape_jobs
SET status = 'failed',
finished_at = NOW(),
updated_at = NOW(),
last_heartbeat_at = NOW(),
error_message = $2,
result_summary = $3::jsonb
WHERE id = $1
""",
job_id,
error_message[:1000],
payload,
)

View file

@ -102,6 +102,10 @@ Merk: Prisene SKAL være tall (integer), ikke tekst. Sett til null hvis du ikke
async def run_scraper(facility_ids=None):
print("🚀 Starter Medlemskaps-skraperen (Støtter multi-URL)...")
conn = await asyncpg.connect(DB_URL)
facilities = []
analyzed_count = 0
saved_count = 0
skipped_count = 0
try:
query = "SELECT id, name, medlemskap_url FROM facilities WHERE medlemskap_url IS NOT NULL AND medlemskap_url != ''"
@ -132,13 +136,17 @@ async def run_scraper(facility_ids=None):
if len(combined_text) < 50:
print(" ⚠️ Fant for lite tekst, hopper over.")
skipped_count += 1
continue
# Kutter teksten for å ikke overbelaste Gemini (ca 25000 tegn maks)
draft_data = analyze_with_gemini(combined_text[:25000], name)
if not draft_data:
skipped_count += 1
continue
analyzed_count += 1
print(f" ✅ AI foreslår: Standard: {draft_data.get('foreslatt_standard_pris')} | Rimeligste: {draft_data.get('foreslatt_rimeligste_pris')}")
@ -149,6 +157,7 @@ async def run_scraper(facility_ids=None):
""", json.dumps(draft_data), fac_id)
print(" 💾 Utkast lagret i databasen!")
saved_count += 1
await browser.close()
@ -156,6 +165,13 @@ async def run_scraper(facility_ids=None):
await conn.close()
print("\n🏁 Skraping fullført.")
return {
"processed_facilities": len(facilities),
"analyzed_facilities": analyzed_count,
"saved_drafts": saved_count,
"skipped_facilities": skipped_count,
}
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Skrap medlemskapspriser via AI.")
parser.add_argument("--ids", type=str, help="Kommaseparert liste med facility IDs (eks: 1,5,12)")
@ -165,4 +181,4 @@ if __name__ == "__main__":
if args.ids:
ids_to_scrape = [int(x.strip()) for x in args.ids.split(",")]
asyncio.run(run_scraper(ids_to_scrape))
asyncio.run(run_scraper(ids_to_scrape))

View file

@ -166,7 +166,12 @@ async def run_daily_scraping(facility_ids=None):
if not facilities:
print("⚠️ Fant ingen anlegg å skrape.")
await conn.close()
return
return {
"processed_facilities": 0,
"updated_courses": 0,
"warnings": 0,
"successes": 0,
}
changes, warnings, successes = [], [], []
@ -315,6 +320,12 @@ async def run_daily_scraping(facility_ids=None):
await conn.close()
send_report(changes, warnings, successes)
print("🏁 Ferdig.")
return {
"processed_facilities": len(facilities),
"updated_courses": len(changes),
"warnings": len(warnings),
"successes": len(successes),
}
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="TeeOff Status Scraper")
@ -329,4 +340,4 @@ if __name__ == "__main__":
print("❌ Feil format på --ids. Må være kommaseparerte tall, f.eks: 1,4,12")
exit(1)
asyncio.run(run_daily_scraping(facility_ids_list))
asyncio.run(run_daily_scraping(facility_ids_list))

View file

@ -97,6 +97,10 @@ Merk: Sett foreslatt_vtg_pris til null (null) hvis du ikke finner den. Hvis du i
async def run_vtg_scraper(facility_ids=None):
print("🚀 Starter Veien til Golf (VTG) skraperen...")
conn = await asyncpg.connect(DB_URL)
facilities = []
analyzed_count = 0
saved_count = 0
skipped_count = 0
try:
query = "SELECT id, name, vtg_lenke FROM facilities WHERE vtg_lenke IS NOT NULL AND vtg_lenke != ''"
@ -126,12 +130,16 @@ async def run_vtg_scraper(facility_ids=None):
if len(combined_text) < 50:
print(" ⚠️ Fant for lite tekst, hopper over.")
skipped_count += 1
continue
draft_data = analyze_vtg_with_gemini(combined_text[:25000], name)
if not draft_data:
skipped_count += 1
continue
analyzed_count += 1
print(f" ✅ AI fant pris: {draft_data.get('foreslatt_vtg_pris')}, og {len(draft_data.get('foreslatt_vtg_datoer', []))} datoer.")
@ -142,6 +150,7 @@ async def run_vtg_scraper(facility_ids=None):
""", json.dumps(draft_data), fac_id)
print(" 💾 VTG-utkast lagret i databasen!")
saved_count += 1
await browser.close()
@ -149,6 +158,13 @@ async def run_vtg_scraper(facility_ids=None):
await conn.close()
print("\n🏁 Skraping fullført.")
return {
"processed_facilities": len(facilities),
"analyzed_facilities": analyzed_count,
"saved_drafts": saved_count,
"skipped_facilities": skipped_count,
}
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Skrap VTG via AI.")
parser.add_argument("--ids", type=str, help="Kommaseparert liste med facility IDs (eks: 1,5,12)")
@ -158,4 +174,4 @@ if __name__ == "__main__":
if args.ids:
ids_to_scrape = [int(x.strip()) for x in args.ids.split(",")]
asyncio.run(run_vtg_scraper(ids_to_scrape))
asyncio.run(run_vtg_scraper(ids_to_scrape))

77
backend/worker.py Executable file
View file

@ -0,0 +1,77 @@
import asyncio
import os
import traceback
import asyncpg
from dotenv import load_dotenv
from scrape_job_runner import run_scrape_job
from scrape_jobs import (
claim_next_scrape_job,
complete_scrape_job,
ensure_scrape_jobs_table,
fail_scrape_job,
heartbeat_scrape_job,
)
load_dotenv()
DB_URL = os.getenv("DATABASE_URL", "postgresql://teeoff_admin:teeoff_secret_password@db:5432/teeoff")
WORKER_NAME = os.getenv("SCRAPE_WORKER_NAME", f"scrape-worker-{os.getpid()}")
POLL_INTERVAL_SECONDS = int(os.getenv("SCRAPE_WORKER_POLL_INTERVAL", "5"))
HEARTBEAT_INTERVAL_SECONDS = int(os.getenv("SCRAPE_WORKER_HEARTBEAT_INTERVAL", "15"))
async def heartbeat_loop(pool, job_id: int, stop_event: asyncio.Event) -> None:
while not stop_event.is_set():
try:
await asyncio.wait_for(stop_event.wait(), timeout=HEARTBEAT_INTERVAL_SECONDS)
except asyncio.TimeoutError:
try:
await heartbeat_scrape_job(pool, job_id)
except Exception as exc:
print(f"⚠️ Klarte ikke å oppdatere heartbeat for jobb {job_id}: {exc}")
async def main() -> None:
print(f"🚀 Starter scrape worker: {WORKER_NAME}")
pool = await asyncpg.create_pool(DB_URL, min_size=1, max_size=5, command_timeout=60)
try:
async with pool.acquire() as conn:
await ensure_scrape_jobs_table(conn)
while True:
job = await claim_next_scrape_job(pool, WORKER_NAME)
if not job:
await asyncio.sleep(POLL_INTERVAL_SECONDS)
continue
job_id = job["id"]
print(f"🎯 Worker plukket jobb #{job_id} ({job['job_type']}) for {len(job.get('facility_ids', []))} anlegg")
stop_event = asyncio.Event()
heartbeat_task = asyncio.create_task(heartbeat_loop(pool, job_id, stop_event))
try:
result_summary = await run_scrape_job(job)
await complete_scrape_job(pool, job_id, result_summary)
print(f"✅ Jobb #{job_id} fullført")
except Exception as exc:
trace = traceback.format_exc(limit=5)
print(f"🔥 Jobb #{job_id} feilet: {exc}\n{trace}")
await fail_scrape_job(
pool,
job_id,
str(exc),
{"traceback": trace},
)
finally:
stop_event.set()
await heartbeat_task
finally:
await pool.close()
if __name__ == "__main__":
asyncio.run(main())

View file

@ -25,6 +25,16 @@ services:
- db
restart: unless-stopped
worker:
build: ./backend
container_name: teeoff_worker
command: python worker.py
volumes:
- ./backend:/app
depends_on:
- db
restart: unless-stopped
frontend:
build: ./frontend
container_name: teeoff_frontend
@ -38,4 +48,4 @@ services:
restart: unless-stopped
volumes:
teeoff_db_data:
teeoff_db_data:

View file

@ -3,11 +3,42 @@
* TEE OFF ADMIN DASHBOARD v4.0 - KONTROLLPANEL
*/
import { useState, useEffect, useMemo } from 'react';
import { useEffect, useMemo, useRef, useState } from 'react';
import { API_URL } from "@/config/constants";
import ScrapeMethodSelect from "@/components/ScrapeMethodSelect";
import Link from 'next/link';
type AdminTab = 'banestatus' | 'medlemskap' | 'greenfee' | 'vtg';
type ScrapeJobStatus = 'pending' | 'running' | 'completed' | 'failed';
type ScrapeJob = {
id: number;
job_type: AdminTab;
status: ScrapeJobStatus;
facility_ids: number[];
total_facilities: number;
error_message?: string | null;
result_summary?: Record<string, number | string | null>;
created_at?: string | null;
started_at?: string | null;
finished_at?: string | null;
};
const JOB_LABELS: Record<AdminTab, string> = {
banestatus: 'Banestatus',
medlemskap: 'Medlemskap',
greenfee: 'Greenfee',
vtg: 'VTG',
};
const JOB_STATUS_LABELS: Record<ScrapeJobStatus, string> = {
pending: 'I kø',
running: 'Kjører',
completed: 'Fullført',
failed: 'Feilet',
};
const InlineEdit = ({ facilityId, field, initialValue, onSave }: { facilityId: number, field: string, initialValue: string, onSave: (id: number, field: string, val: string) => void }) => {
const [isEditing, setIsEditing] = useState(false);
const [value, setValue] = useState(initialValue || '');
@ -45,13 +76,15 @@ export default function AdminDashboard() {
const [facilities, setFacilities] = useState<any[]>([]);
const [loading, setLoading] = useState(true);
const [selectedFacilities, setSelectedFacilities] = useState<number[]>([]);
const [isScraping, setIsScraping] = useState(false);
const [scrapeJobs, setScrapeJobs] = useState<ScrapeJob[]>([]);
const [isQueueing, setIsQueueing] = useState(false);
const [isSidebarCollapsed, setIsSidebarCollapsed] = useState(false);
const [editingFacility, setEditingFacility] = useState<any | null>(null);
const [activeTab, setActiveTab] = useState<'banestatus' | 'medlemskap' | 'greenfee' | 'vtg'>('banestatus');
const [activeTab, setActiveTab] = useState<AdminTab>('banestatus');
const [statusFilter, setStatusFilter] = useState('alle');
const [editForm, setEditForm] = useState({ scrape_status_url: '', scrape_status_selector: '', scrape_method: '', ai_instruction: '', courses: [] as any[] });
const [isSaving, setIsSaving] = useState(false);
const latestJobStateRef = useRef<string | null>(null);
const fetchFacilities = () => {
fetch(`${API_URL}/facilities`)
@ -63,15 +96,58 @@ export default function AdminDashboard() {
.catch(() => setLoading(false));
};
useEffect(() => { fetchFacilities(); }, []);
const fetchScrapeJobs = (tab: AdminTab = activeTab) => {
fetch(`${API_URL}/admin/scrape-jobs?job_type=${tab}&limit=5`)
.then(res => res.json())
.then(data => {
setScrapeJobs(Array.isArray(data) ? data : []);
})
.catch(() => setScrapeJobs([]));
};
const activeJob = useMemo(
() => scrapeJobs.find(job => job.status === 'pending' || job.status === 'running') || null,
[scrapeJobs]
);
const latestJob = scrapeJobs[0] || null;
const isScraping = !!activeJob;
useEffect(() => {
let interval: NodeJS.Timeout;
if (isScraping) interval = setInterval(() => fetchFacilities(), 10000);
fetchFacilities();
fetchScrapeJobs('banestatus');
}, []);
useEffect(() => {
const interval = setInterval(() => fetchScrapeJobs(activeTab), 5000);
return () => clearInterval(interval);
}, [activeTab]);
useEffect(() => {
setSelectedFacilities([]);
fetchScrapeJobs(activeTab);
}, [activeTab]);
useEffect(() => {
if (!isScraping) return;
const interval = setInterval(() => fetchFacilities(), 10000);
return () => clearInterval(interval);
}, [isScraping]);
useEffect(() => { setSelectedFacilities([]); }, [activeTab]);
useEffect(() => {
const currentState = latestJob ? `${latestJob.id}:${latestJob.status}` : null;
const previousState = latestJobStateRef.current;
latestJobStateRef.current = currentState;
if (
previousState &&
previousState !== currentState &&
latestJob &&
(latestJob.status === 'completed' || latestJob.status === 'failed')
) {
fetchFacilities();
fetchScrapeJobs(activeTab);
}
}, [activeTab, latestJob]);
const filteredFacilities = useMemo(() => {
if (statusFilter === 'alle') return facilities;
@ -89,6 +165,15 @@ export default function AdminDashboard() {
}).filter(facility => facility.course_statuses && facility.course_statuses.length > 0);
}, [facilities, statusFilter]);
const latestJobSummary = useMemo(() => {
if (!latestJob?.result_summary) return '';
return Object.entries(latestJob.result_summary)
.filter(([, value]) => value !== null && value !== undefined && value !== '')
.map(([key, value]) => `${key.replaceAll('_', ' ')}: ${value}`)
.join(' • ');
}, [latestJob]);
const handleSelectAll = (e: React.ChangeEvent<HTMLInputElement>) => {
if (e.target.checked) setSelectedFacilities(filteredFacilities.map(f => f.id));
else setSelectedFacilities([]);
@ -115,8 +200,8 @@ export default function AdminDashboard() {
};
const handleRunScrapers = async () => {
if (isScraping) { setIsScraping(false); return; }
setIsScraping(true);
if (selectedFacilities.length === 0) return;
setIsQueueing(true);
const endpoint = activeTab === 'banestatus' ? '/admin/run-scraper' :
activeTab === 'medlemskap' ? '/admin/run-membership-scraper' :
activeTab === 'greenfee' ? '/admin/run-greenfee-scraper' :
@ -127,13 +212,14 @@ export default function AdminDashboard() {
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ facility_ids: selectedFacilities })
});
if (!response.ok) throw new Error("Kunne ikke starte skraping");
const timeoutMs = Math.max(selectedFacilities.length * 40 * 1000, 60000);
setSelectedFacilities([]);
setTimeout(() => setIsScraping(false), timeoutMs);
const data = await response.json();
if (!response.ok) throw new Error(data.detail || "Kunne ikke starte skraping");
setSelectedFacilities([]);
fetchScrapeJobs(activeTab);
} catch (error) {
alert(`Feil ved start av ${activeTab}-skraperen.`);
setIsScraping(false);
} finally {
setIsQueueing(false);
}
};
@ -280,14 +366,63 @@ export default function AdminDashboard() {
<button
onClick={handleRunScrapers}
disabled={selectedFacilities.length === 0 && !isScraping}
disabled={selectedFacilities.length === 0 || isQueueing}
className={`text-white px-6 py-4 rounded-2xl text-[10px] font-black uppercase tracking-widest shadow-xl transition-all whitespace-nowrap
${isScraping ? 'bg-yellow-500 animate-pulse cursor-pointer hover:bg-yellow-600' : 'bg-[#8bc34a] hover:scale-105 disabled:bg-gray-200 disabled:text-gray-400 disabled:cursor-not-allowed'}`}
${isQueueing ? 'bg-yellow-500 animate-pulse' : 'bg-[#8bc34a] hover:scale-105 disabled:bg-gray-200 disabled:text-gray-400 disabled:cursor-not-allowed'}`}
>
{isScraping ? '🤖 Skraper... Klikk for å avslutte' : `Kjør ${activeTab}-skrapere (${selectedFacilities.length})`}
{isQueueing ? 'Legger i kø...' : isScraping ? `Legg ${activeTab}-skraping i kø (${selectedFacilities.length})` : `Kjør ${activeTab}-skrapere (${selectedFacilities.length})`}
</button>
</header>
{latestJob && latestJob.job_type === activeTab && (
<div className={`mb-8 rounded-[1.75rem] border p-5 md:p-6 animate-fade-in ${
latestJob.status === 'failed'
? 'bg-red-50 border-red-100'
: latestJob.status === 'completed'
? 'bg-[#f1f7ed] border-[#d8e8c8]'
: 'bg-amber-50 border-amber-100'
}`}>
<div className="flex flex-col md:flex-row md:items-start md:justify-between gap-4">
<div className="space-y-2">
<p className="text-[10px] font-black uppercase tracking-[0.2em] text-gray-500">
{JOB_LABELS[activeTab]} jobb #{latestJob.id}
</p>
<div className="flex flex-wrap items-center gap-3">
<span className={`inline-flex px-3 py-1 rounded-xl text-[10px] font-black uppercase tracking-widest ${
latestJob.status === 'failed'
? 'bg-red-100 text-red-700'
: latestJob.status === 'completed'
? 'bg-[#d8e8c8] text-[#11280f]'
: 'bg-amber-100 text-amber-700 animate-pulse'
}`}>
{JOB_STATUS_LABELS[latestJob.status]}
</span>
<span className="text-xs font-bold text-[#11280f]">
{latestJob.total_facilities} anlegg
</span>
{latestJob.created_at && (
<span className="text-xs text-gray-500">
Opprettet {new Date(latestJob.created_at).toLocaleString('nb-NO')}
</span>
)}
</div>
{latestJobSummary && (
<p className="text-xs text-gray-600 leading-relaxed">{latestJobSummary}</p>
)}
{latestJob.error_message && (
<p className="text-xs text-red-600 leading-relaxed">{latestJob.error_message}</p>
)}
</div>
<button
onClick={() => fetchScrapeJobs(activeTab)}
className="px-4 py-2 rounded-xl bg-white text-[10px] font-black uppercase tracking-widest text-gray-500 border border-gray-200 hover:border-[#8bc34a] hover:text-[#11280f] transition-colors"
>
Oppdater status
</button>
</div>
</div>
)}
{/* VELDIG SYNLIGE FANER */}
<div className="flex gap-2 mb-8 border-b-2 border-gray-100 pb-0 overflow-x-auto hide-scrollbar">
<button onClick={() => setActiveTab('banestatus')} className={`px-6 py-3 text-xs font-black uppercase tracking-widest rounded-t-xl transition-all whitespace-nowrap ${activeTab === 'banestatus' ? 'bg-[#8bc34a] text-white shadow-md' : 'bg-gray-50 text-gray-500 hover:bg-gray-200'}`}>Banestatus</button>
@ -465,4 +600,4 @@ export default function AdminDashboard() {
</main>
</div>
);
}
}