78 lines
2.5 KiB
Python
78 lines
2.5 KiB
Python
|
|
import asyncio
|
||
|
|
import os
|
||
|
|
import traceback
|
||
|
|
|
||
|
|
import asyncpg
|
||
|
|
from dotenv import load_dotenv
|
||
|
|
|
||
|
|
from scrape_job_runner import run_scrape_job
|
||
|
|
from scrape_jobs import (
|
||
|
|
claim_next_scrape_job,
|
||
|
|
complete_scrape_job,
|
||
|
|
ensure_scrape_jobs_table,
|
||
|
|
fail_scrape_job,
|
||
|
|
heartbeat_scrape_job,
|
||
|
|
)
|
||
|
|
|
||
|
|
load_dotenv()
|
||
|
|
|
||
|
|
DB_URL = os.getenv("DATABASE_URL", "postgresql://teeoff_admin:teeoff_secret_password@db:5432/teeoff")
|
||
|
|
WORKER_NAME = os.getenv("SCRAPE_WORKER_NAME", f"scrape-worker-{os.getpid()}")
|
||
|
|
POLL_INTERVAL_SECONDS = int(os.getenv("SCRAPE_WORKER_POLL_INTERVAL", "5"))
|
||
|
|
HEARTBEAT_INTERVAL_SECONDS = int(os.getenv("SCRAPE_WORKER_HEARTBEAT_INTERVAL", "15"))
|
||
|
|
|
||
|
|
|
||
|
|
async def heartbeat_loop(pool, job_id: int, stop_event: asyncio.Event) -> None:
|
||
|
|
while not stop_event.is_set():
|
||
|
|
try:
|
||
|
|
await asyncio.wait_for(stop_event.wait(), timeout=HEARTBEAT_INTERVAL_SECONDS)
|
||
|
|
except asyncio.TimeoutError:
|
||
|
|
try:
|
||
|
|
await heartbeat_scrape_job(pool, job_id)
|
||
|
|
except Exception as exc:
|
||
|
|
print(f"⚠️ Klarte ikke å oppdatere heartbeat for jobb {job_id}: {exc}")
|
||
|
|
|
||
|
|
|
||
|
|
async def main() -> None:
|
||
|
|
print(f"🚀 Starter scrape worker: {WORKER_NAME}")
|
||
|
|
pool = await asyncpg.create_pool(DB_URL, min_size=1, max_size=5, command_timeout=60)
|
||
|
|
|
||
|
|
try:
|
||
|
|
async with pool.acquire() as conn:
|
||
|
|
await ensure_scrape_jobs_table(conn)
|
||
|
|
|
||
|
|
while True:
|
||
|
|
job = await claim_next_scrape_job(pool, WORKER_NAME)
|
||
|
|
if not job:
|
||
|
|
await asyncio.sleep(POLL_INTERVAL_SECONDS)
|
||
|
|
continue
|
||
|
|
|
||
|
|
job_id = job["id"]
|
||
|
|
print(f"🎯 Worker plukket jobb #{job_id} ({job['job_type']}) for {len(job.get('facility_ids', []))} anlegg")
|
||
|
|
|
||
|
|
stop_event = asyncio.Event()
|
||
|
|
heartbeat_task = asyncio.create_task(heartbeat_loop(pool, job_id, stop_event))
|
||
|
|
|
||
|
|
try:
|
||
|
|
result_summary = await run_scrape_job(job)
|
||
|
|
await complete_scrape_job(pool, job_id, result_summary)
|
||
|
|
print(f"✅ Jobb #{job_id} fullført")
|
||
|
|
except Exception as exc:
|
||
|
|
trace = traceback.format_exc(limit=5)
|
||
|
|
print(f"🔥 Jobb #{job_id} feilet: {exc}\n{trace}")
|
||
|
|
await fail_scrape_job(
|
||
|
|
pool,
|
||
|
|
job_id,
|
||
|
|
str(exc),
|
||
|
|
{"traceback": trace},
|
||
|
|
)
|
||
|
|
finally:
|
||
|
|
stop_event.set()
|
||
|
|
await heartbeat_task
|
||
|
|
finally:
|
||
|
|
await pool.close()
|
||
|
|
|
||
|
|
|
||
|
|
if __name__ == "__main__":
|
||
|
|
asyncio.run(main())
|