2026-02-28 09:20:56 +01:00
import asyncio
import os
import asyncpg
import smtplib
2026-03-02 19:39:40 +01:00
import re
2026-03-05 05:18:03 +01:00
import argparse
2026-02-28 09:20:56 +01:00
from datetime import datetime
from email . mime . text import MIMEText
from email . mime . multipart import MIMEMultipart
from playwright . async_api import async_playwright
try :
from playwright_stealth import stealth_async as apply_stealth
except ImportError :
from playwright_stealth import stealth as apply_stealth
2026-03-05 05:18:03 +01:00
from google import genai
2026-02-28 09:20:56 +01:00
from dotenv import load_dotenv
load_dotenv ( )
2026-03-05 05:18:03 +01:00
DB_URL = os . getenv ( " DATABASE_URL " , " postgresql://teeoff_admin:teeoff_secret_password@db:5432/teeoff " )
2026-02-28 09:20:56 +01:00
2026-03-05 05:18:03 +01:00
# ==========================================
# KONFIGURERER GEMINI AI (NY SDK)
# ==========================================
# Den nye pakken henter automatisk GEMINI_API_KEY fra .env-filen din
client = genai . Client ( )
2026-03-05 09:25:15 +01:00
async def ask_llm_status ( text , course_name , is_single_course , ai_instruction = None ) :
2026-03-05 05:18:03 +01:00
""" Sender teksten til Gemini og ber om ett enkelt status-ord tilbake. """
# 1. Dynamisk instruks basert på antall baner
if is_single_course :
bane_instruks = " Finn den generelle banestatusen for dette golfanlegget. Se bort fra spesifikke banenavn, da anlegget kun har én bane. "
else :
bane_instruks = f ' Finn banestatusen SPESIFIKT for banen som heter/omtales som: " { course_name } " . '
2026-03-05 09:25:15 +01:00
# NYTT: Hvisker inn i øret til AI-en hvis vi har en instruks fra admin
ekstra_tekst = f " \n VIKTIG EKSTRA-INSTRUKS FRA ADMIN: \n { ai_instruction } \n " if ai_instruction else " "
2026-03-05 05:18:03 +01:00
# 2. Selve promptet
prompt = f """
Du er en ekspert på å lese norske golfklubbers nettsider for å finne banestatus .
{ bane_instruks }
2026-03-05 09:25:15 +01:00
{ ekstra_tekst }
2026-03-05 05:18:03 +01:00
Svar KUN med nøyaktig ETT av disse ordene :
- aapen ( hvis banen er åpen / sommergreener )
- stengt ( hvis banen er lukket / stengt / frost / snø )
- aapen_med_vintergreener ( hvis det spilles på vintergreener )
- aapner_snart ( hvis den åpner om kort tid )
- stenger_snart ( hvis den stenger for sesongen om kort tid )
- under_utvikling ( hvis den er under utvikling )
- nedlagt ( hvis den er nedlagt )
- ukjent ( hvis du ikke finner noe info om banen i teksten )
Tekst fra nettsiden :
{ text [ : 15000 ] }
"""
try :
response = await client . aio . models . generate_content (
model = ' gemini-2.5-flash ' ,
contents = prompt
)
svar = response . text . strip ( ) . lower ( )
# 3. Sikkerhetsfilteret som matcher ordene i promptet
gyldige_svar = [
" aapen " ,
" stengt " ,
" aapen_med_vintergreener " ,
" aapner_snart " ,
" stenger_snart " ,
" under_utvikling " ,
" nedlagt " ,
" ukjent "
]
for gyldig in gyldige_svar :
if gyldig in svar :
return gyldig
return " ukjent "
except Exception as e :
print ( f " ❌ Gemini Feil: { e } " )
return " ukjent "
# ==========================================
# EKSISTERENDE LOGIKK FOR MANUELL SCRAPING
# ==========================================
2026-02-28 09:20:56 +01:00
def clean_text ( text ) :
return re . sub ( r ' [^a-zA-Z0-9æøåÆØÅ] ' , ' ' , text ) . lower ( )
def interpret_status ( text , keyword = None ) :
t_raw = text . lower ( )
if keyword :
k_clean = clean_text ( keyword )
if k_clean not in clean_text ( t_raw ) :
return " NOT_FOUND "
parts = re . split ( re . escape ( keyword ) , t_raw , flags = re . IGNORECASE )
if len ( parts ) > 1 :
t_raw = parts [ 1 ] [ : 150 ]
else :
2026-03-02 19:39:40 +01:00
t_raw = t_raw [ - 200 : ]
2026-02-28 09:20:56 +01:00
if any ( word in t_raw for word in [ " stengt " , " lukket " , " frost " , " snø " , " is " , " closed " , " stenger " ] ) :
return " stengt "
if any ( word in t_raw for word in [ " vintergreen " , " vintergrønn " , " vinter " ] ) :
return " aapen_med_vintergreener "
if any ( word in t_raw for word in [ " snart " , " åpner kl " ] ) :
return " aapner_snart "
if any ( word in t_raw for word in [ " åpen " , " åpent " , " aapen " , " open " ] ) :
return " aapen "
return " ukjent "
2026-03-02 19:39:40 +01:00
def send_report ( changes , warnings , successes ) :
if not changes and not warnings and not successes : return
2026-02-28 09:20:56 +01:00
subject = f " TeeOff Banestatus Rapport - { datetime . now ( ) . strftime ( ' %d . % m. % Y ' ) } "
2026-03-05 05:18:03 +01:00
2026-02-28 09:20:56 +01:00
body = " BANESTATUS RAPPORT \n " + " = " * 30 + " \n \n "
2026-03-02 19:39:40 +01:00
2026-02-28 09:20:56 +01:00
if changes : body + = " ✅ OPPDATERINGER: \n " + " \n " . join ( changes ) + " \n \n "
2026-03-02 19:39:40 +01:00
if warnings : body + = " ⚠️ MERKNADER / ADVARSLER: \n " + " \n " . join ( warnings ) + " \n \n "
if successes : body + = " 🆗 VELLYKKEDE SJEKKER (INGEN ENDRING): \n " + " \n " . join ( successes ) + " \n "
2026-02-28 09:20:56 +01:00
2026-03-02 19:39:40 +01:00
msg = MIMEMultipart ( )
msg [ ' From ' ] = os . getenv ( " SMTP_USER " )
msg [ ' To ' ] = os . getenv ( " EMAIL_TO " )
msg [ ' Subject ' ] = subject
2026-02-28 09:20:56 +01:00
msg . attach ( MIMEText ( body , ' plain ' ) )
try :
with smtplib . SMTP_SSL ( os . getenv ( " SMTP_SERVER " ) , int ( os . getenv ( " SMTP_PORT " ) ) ) as server :
server . login ( os . getenv ( " SMTP_USER " ) , os . getenv ( " SMTP_PASS " ) )
server . send_message ( msg )
print ( " ✅ Rapport sendt på e-post. " )
2026-03-02 19:39:40 +01:00
except Exception as e :
print ( f " ❌ E-post feil: { e } " )
2026-02-28 09:20:56 +01:00
2026-03-05 05:18:03 +01:00
# ==========================================
# HOVEDMOTOR
# ==========================================
async def run_daily_scraping ( facility_ids = None ) :
2026-02-28 09:20:56 +01:00
print ( f " 🚀 Starter sjekk { datetime . now ( ) . strftime ( ' % H: % M: % S ' ) } ... " )
conn = await asyncpg . connect ( DB_URL )
2026-03-05 05:18:03 +01:00
# --- NYTT: Filtrerer basert på valgte IDer fra Admin-panelet ---
if facility_ids :
print ( f " 📌 Kjører skraping KUN for anlegg-ID(er): { facility_ids } " )
facilities = await conn . fetch (
2026-03-05 09:25:15 +01:00
" SELECT id, name, scrape_status_url, scrape_status_selector, scrape_method, ai_instruction FROM facilities WHERE scrape_status_url IS NOT NULL AND id = ANY($1::int[]) " ,
2026-03-05 05:18:03 +01:00
facility_ids
)
else :
print ( " 🌍 Kjører skraping for ALLE anlegg med scrape_status_url... " )
facilities = await conn . fetch (
2026-03-05 09:25:15 +01:00
" SELECT id, name, scrape_status_url, scrape_status_selector, scrape_method, ai_instruction FROM facilities WHERE scrape_status_url IS NOT NULL "
2026-03-05 05:18:03 +01:00
)
if not facilities :
print ( " ⚠️ Fant ingen anlegg å skrape. " )
await conn . close ( )
return
# ----------------------------------------------------------------
2026-03-02 19:39:40 +01:00
changes , warnings , successes = [ ] , [ ] , [ ]
2026-02-28 09:20:56 +01:00
async with async_playwright ( ) as p :
browser = await p . chromium . launch ( headless = True )
context = await browser . new_context ( )
for f in facilities :
2026-03-05 09:25:15 +01:00
method = f . get ( ' scrape_method ' ) or ' css_selector '
# THE KILL SWITCH - Hopper over manuelle baner
if method == ' manual ' :
successes . append ( f " ⏸️ { f [ ' name ' ] } : Hoppet over (Manuell overstyring) " )
print ( f " ⏸️ Hopper over skraping av { f [ ' name ' ] } (Satt til Manuell) " )
continue
2026-02-28 09:20:56 +01:00
page = await context . new_page ( )
try : await apply_stealth ( page )
except : pass
try :
2026-03-05 09:25:15 +01:00
print ( f " 🔍 Besøker { f [ ' name ' ] } (Metode: { method } )... " )
2026-03-02 19:39:40 +01:00
await page . goto ( f [ ' scrape_status_url ' ] , timeout = 60000 , wait_until = " domcontentloaded " )
await asyncio . sleep ( 3 ) # Gir Javascript 3 sekunder på å bygge siden
2026-02-28 09:20:56 +01:00
2026-03-02 19:39:40 +01:00
full_text = " "
if method == ' css_selector ' :
element = page . locator ( f [ ' scrape_status_selector ' ] ) . first
if await element . count ( ) == 0 :
warnings . append ( f " ❌ { f [ ' name ' ] } : Fant ikke CSS-elementet ' { f [ ' scrape_status_selector ' ] } ' " )
continue
full_text = await element . inner_text ( )
elif method == ' iframe_golfbox ' :
frame = page . frame_locator ( ' iframe[src*= " golfbox " ] ' )
element = frame . locator ( f [ ' scrape_status_selector ' ] ) . first
if await element . count ( ) == 0 :
warnings . append ( f " ❌ { f [ ' name ' ] } : Fant ikke elementet ' { f [ ' scrape_status_selector ' ] } ' i iframen " )
continue
full_text = await element . inner_text ( )
2026-03-04 13:17:10 +01:00
elif method == ' click_then_css ' :
parts = f [ ' scrape_status_selector ' ] . split ( ' || ' )
if len ( parts ) != 2 :
warnings . append ( f " ❌ { f [ ' name ' ] } : Ugyldig selector for click_then_css (mangler ||) " )
continue
btn_selector , text_selector = parts
btn = page . locator ( btn_selector ) . first
if await btn . count ( ) == 0 :
warnings . append ( f " ❌ { f [ ' name ' ] } : Fant ikke knappen å klikke på: ' { btn_selector } ' " )
continue
await btn . click ( )
await asyncio . sleep ( 2 )
element = page . locator ( text_selector ) . first
if await element . count ( ) == 0 :
warnings . append ( f " ❌ { f [ ' name ' ] } : Fant ikke tekstboksen ' { text_selector } ' etter klikk " )
continue
full_text = await element . inner_text ( )
2026-02-28 09:20:56 +01:00
2026-03-05 05:18:03 +01:00
# NY METODE: LLM PARSE (GEMINI)
elif method == ' llm_parse ' :
# --- AUTO-KLIKKER ---
print ( " 🖱️ Leter etter ' banestatus ' -knapper å klikke på... " )
knapper = await page . get_by_text ( re . compile ( r " banestatus " , re . IGNORECASE ) ) . all ( )
for knapp in knapper :
try :
if await knapp . is_visible ( ) :
await knapp . click ( timeout = 3000 )
print ( " 🎯 Klikket på en banestatus-knapp! Venter 2 sekunder... " )
await asyncio . sleep ( 2 )
break
except Exception :
pass
# --------------------
# Kopierer all synlig tekst fra hele nettsiden
element = page . locator ( " body " ) . first
if await element . count ( ) == 0 :
warnings . append ( f " ❌ { f [ ' name ' ] } : Klarte ikke å lese siden for AI-tolkning " )
continue
råtekst = await element . inner_text ( )
# Fjerner overflødige linjeskift for å komprimere teksten før sending til Gemini
full_text = " " . join ( råtekst . split ( ) )
2026-03-02 19:39:40 +01:00
else :
warnings . append ( f " ⚠️ { f [ ' name ' ] } : Ukjent skrapemetode i databasen: ' { method } ' " )
continue
2026-02-28 09:20:56 +01:00
await conn . execute ( " UPDATE facilities SET status_updated_at = CURRENT_DATE WHERE id = $1 " , f [ ' id ' ] )
courses = await conn . fetch ( " SELECT id, name, status, scrape_keyword FROM courses WHERE facility_id = $1 " , f [ ' id ' ] )
2026-03-05 05:18:03 +01:00
# Sjekk om anlegget kun har én bane
is_single_course = len ( courses ) == 1
2026-02-28 09:20:56 +01:00
for c in courses :
2026-03-05 05:18:03 +01:00
# HENTER STATUS VIA AI ELLER GAMMEL METODE
if method == ' llm_parse ' :
print ( f " 🤖 Spør Gemini om status for ' { c [ ' name ' ] } ' (Singelbane: { is_single_course } )... " )
2026-03-05 09:25:15 +01:00
new_status = await ask_llm_status ( full_text , c [ ' name ' ] , is_single_course , f . get ( ' ai_instruction ' ) )
2026-03-05 05:18:03 +01:00
else :
new_status = interpret_status ( full_text , c [ ' scrape_keyword ' ] )
2026-02-28 09:20:56 +01:00
if new_status == " NOT_FOUND " :
2026-03-05 05:18:03 +01:00
warnings . append ( f " ❓ { f [ ' name ' ] } ( { c [ ' name ' ] } ): Fant ikke søkeordet ' { c [ ' scrape_keyword ' ] } ' i teksten. " )
2026-02-28 09:20:56 +01:00
continue
old_status = c [ ' status ' ] or " ukjent "
if new_status != old_status and new_status != " ukjent " :
await conn . execute ( " UPDATE courses SET status = $1 WHERE id = $2 " , new_status , c [ ' id ' ] )
changes . append ( f " 🔹 { f [ ' name ' ] } ( { c [ ' name ' ] } ): { old_status . upper ( ) } ➔ { new_status . upper ( ) } " )
print ( f " ✅ Oppdatert status for { f [ ' name ' ] } - { c [ ' name ' ] } " )
else :
2026-03-02 19:39:40 +01:00
successes . append ( f " ✅ { f [ ' name ' ] } ( { c [ ' name ' ] } ): { new_status . upper ( ) } " )
2026-02-28 09:20:56 +01:00
print ( f " - { c [ ' name ' ] } : Ingen endring ( { new_status . upper ( ) } ) " )
except Exception as e :
2026-03-02 19:39:40 +01:00
err_msg = str ( e ) . split ( ' \n ' ) [ 0 ]
warnings . append ( f " 🔥 { f [ ' name ' ] } : Feil under skraping: { err_msg } " )
2026-02-28 09:20:56 +01:00
finally :
await page . close ( )
2026-03-05 05:18:03 +01:00
2026-02-28 09:20:56 +01:00
await browser . close ( )
await conn . close ( )
2026-03-02 19:39:40 +01:00
send_report ( changes , warnings , successes )
2026-02-28 09:20:56 +01:00
print ( " 🏁 Ferdig. " )
if __name__ == " __main__ " :
2026-03-05 05:18:03 +01:00
# --- NYTT: Tar imot argumenter fra main.py (Background Task) ---
parser = argparse . ArgumentParser ( description = " TeeOff Status Scraper " )
parser . add_argument ( " --ids " , type = str , help = " Kommaseparert liste med anleggs-IDer " , default = None )
args = parser . parse_args ( )
facility_ids_list = None
if args . ids :
try :
facility_ids_list = [ int ( id_str . strip ( ) ) for id_str in args . ids . split ( " , " ) if id_str . strip ( ) ]
except ValueError :
print ( " ❌ Feil format på --ids. Må være kommaseparerte tall, f.eks: 1,4,12 " )
exit ( 1 )
asyncio . run ( run_daily_scraping ( facility_ids_list ) )