Nye-TeeOff/backend/worker.py

77 lines
2.5 KiB
Python
Executable file

import asyncio
import os
import traceback
import asyncpg
from dotenv import load_dotenv
from scrape_job_runner import run_scrape_job
from scrape_jobs import (
claim_next_scrape_job,
complete_scrape_job,
ensure_scrape_jobs_table,
fail_scrape_job,
heartbeat_scrape_job,
)
load_dotenv()
DB_URL = os.getenv("DATABASE_URL", "postgresql://teeoff_admin:teeoff_secret_password@db:5432/teeoff")
WORKER_NAME = os.getenv("SCRAPE_WORKER_NAME", f"scrape-worker-{os.getpid()}")
POLL_INTERVAL_SECONDS = int(os.getenv("SCRAPE_WORKER_POLL_INTERVAL", "5"))
HEARTBEAT_INTERVAL_SECONDS = int(os.getenv("SCRAPE_WORKER_HEARTBEAT_INTERVAL", "15"))
async def heartbeat_loop(pool, job_id: int, stop_event: asyncio.Event) -> None:
while not stop_event.is_set():
try:
await asyncio.wait_for(stop_event.wait(), timeout=HEARTBEAT_INTERVAL_SECONDS)
except asyncio.TimeoutError:
try:
await heartbeat_scrape_job(pool, job_id)
except Exception as exc:
print(f"⚠️ Klarte ikke å oppdatere heartbeat for jobb {job_id}: {exc}")
async def main() -> None:
print(f"🚀 Starter scrape worker: {WORKER_NAME}")
pool = await asyncpg.create_pool(DB_URL, min_size=1, max_size=5, command_timeout=60)
try:
async with pool.acquire() as conn:
await ensure_scrape_jobs_table(conn)
while True:
job = await claim_next_scrape_job(pool, WORKER_NAME)
if not job:
await asyncio.sleep(POLL_INTERVAL_SECONDS)
continue
job_id = job["id"]
print(f"🎯 Worker plukket jobb #{job_id} ({job['job_type']}) for {len(job.get('facility_ids', []))} anlegg")
stop_event = asyncio.Event()
heartbeat_task = asyncio.create_task(heartbeat_loop(pool, job_id, stop_event))
try:
result_summary = await run_scrape_job(job)
await complete_scrape_job(pool, job_id, result_summary)
print(f"✅ Jobb #{job_id} fullført")
except Exception as exc:
trace = traceback.format_exc(limit=5)
print(f"🔥 Jobb #{job_id} feilet: {exc}\n{trace}")
await fail_scrape_job(
pool,
job_id,
str(exc),
{"traceback": trace},
)
finally:
stop_event.set()
await heartbeat_task
finally:
await pool.close()
if __name__ == "__main__":
asyncio.run(main())