2026-02-28 09:20:56 +01:00
import asyncio
import os
import asyncpg
import smtplib
2026-03-02 19:39:40 +01:00
import re
2026-02-28 09:20:56 +01:00
from datetime import datetime
from email . mime . text import MIMEText
from email . mime . multipart import MIMEMultipart
from playwright . async_api import async_playwright
try :
from playwright_stealth import stealth_async as apply_stealth
except ImportError :
from playwright_stealth import stealth as apply_stealth
from dotenv import load_dotenv
load_dotenv ( )
DB_URL = " postgresql://teeoff_admin:teeoff_secret_password@db:5432/teeoff "
def clean_text ( text ) :
return re . sub ( r ' [^a-zA-Z0-9æøåÆØÅ] ' , ' ' , text ) . lower ( )
def interpret_status ( text , keyword = None ) :
t_raw = text . lower ( )
if keyword :
k_clean = clean_text ( keyword )
if k_clean not in clean_text ( t_raw ) :
return " NOT_FOUND "
parts = re . split ( re . escape ( keyword ) , t_raw , flags = re . IGNORECASE )
if len ( parts ) > 1 :
t_raw = parts [ 1 ] [ : 150 ]
else :
2026-03-02 19:39:40 +01:00
t_raw = t_raw [ - 200 : ]
2026-02-28 09:20:56 +01:00
if any ( word in t_raw for word in [ " stengt " , " lukket " , " frost " , " snø " , " is " , " closed " , " stenger " ] ) :
return " stengt "
if any ( word in t_raw for word in [ " vintergreen " , " vintergrønn " , " vinter " ] ) :
return " aapen_med_vintergreener "
if any ( word in t_raw for word in [ " snart " , " åpner kl " ] ) :
return " aapner_snart "
if any ( word in t_raw for word in [ " åpen " , " åpent " , " aapen " , " open " ] ) :
return " aapen "
return " ukjent "
2026-03-02 19:39:40 +01:00
def send_report ( changes , warnings , successes ) :
if not changes and not warnings and not successes : return
2026-02-28 09:20:56 +01:00
subject = f " TeeOff Banestatus Rapport - { datetime . now ( ) . strftime ( ' %d . % m. % Y ' ) } "
body = " BANESTATUS RAPPORT \n " + " = " * 30 + " \n \n "
2026-03-02 19:39:40 +01:00
2026-02-28 09:20:56 +01:00
if changes : body + = " ✅ OPPDATERINGER: \n " + " \n " . join ( changes ) + " \n \n "
2026-03-02 19:39:40 +01:00
if warnings : body + = " ⚠️ MERKNADER / ADVARSLER: \n " + " \n " . join ( warnings ) + " \n \n "
if successes : body + = " 🆗 VELLYKKEDE SJEKKER (INGEN ENDRING): \n " + " \n " . join ( successes ) + " \n "
2026-02-28 09:20:56 +01:00
2026-03-02 19:39:40 +01:00
msg = MIMEMultipart ( )
msg [ ' From ' ] = os . getenv ( " SMTP_USER " )
msg [ ' To ' ] = os . getenv ( " EMAIL_TO " )
msg [ ' Subject ' ] = subject
2026-02-28 09:20:56 +01:00
msg . attach ( MIMEText ( body , ' plain ' ) )
try :
with smtplib . SMTP_SSL ( os . getenv ( " SMTP_SERVER " ) , int ( os . getenv ( " SMTP_PORT " ) ) ) as server :
server . login ( os . getenv ( " SMTP_USER " ) , os . getenv ( " SMTP_PASS " ) )
server . send_message ( msg )
print ( " ✅ Rapport sendt på e-post. " )
2026-03-02 19:39:40 +01:00
except Exception as e :
print ( f " ❌ E-post feil: { e } " )
2026-02-28 09:20:56 +01:00
async def run_daily_scraping ( ) :
print ( f " 🚀 Starter sjekk { datetime . now ( ) . strftime ( ' % H: % M: % S ' ) } ... " )
conn = await asyncpg . connect ( DB_URL )
2026-03-02 19:39:40 +01:00
facilities = await conn . fetch ( " SELECT id, name, scrape_status_url, scrape_status_selector, scrape_method FROM facilities WHERE scrape_status_url IS NOT NULL " )
2026-02-28 09:20:56 +01:00
2026-03-02 19:39:40 +01:00
changes , warnings , successes = [ ] , [ ] , [ ]
2026-02-28 09:20:56 +01:00
async with async_playwright ( ) as p :
browser = await p . chromium . launch ( headless = True )
context = await browser . new_context ( )
for f in facilities :
page = await context . new_page ( )
try : await apply_stealth ( page )
except : pass
try :
print ( f " 🔍 Besøker { f [ ' name ' ] } ... " )
2026-03-02 19:39:40 +01:00
# Endret fra networkidle til domcontentloaded for å unngå Arendal-timeout
await page . goto ( f [ ' scrape_status_url ' ] , timeout = 60000 , wait_until = " domcontentloaded " )
await asyncio . sleep ( 3 ) # Gir Javascript 3 sekunder på å bygge siden
2026-02-28 09:20:56 +01:00
2026-03-02 19:39:40 +01:00
full_text = " "
method = f . get ( ' scrape_method ' ) or ' css_selector '
if method == ' css_selector ' :
element = page . locator ( f [ ' scrape_status_selector ' ] ) . first
if await element . count ( ) == 0 :
warnings . append ( f " ❌ { f [ ' name ' ] } : Fant ikke CSS-elementet ' { f [ ' scrape_status_selector ' ] } ' " )
continue
full_text = await element . inner_text ( )
elif method == ' iframe_golfbox ' :
frame = page . frame_locator ( ' iframe[src*= " golfbox " ] ' )
element = frame . locator ( f [ ' scrape_status_selector ' ] ) . first
if await element . count ( ) == 0 :
warnings . append ( f " ❌ { f [ ' name ' ] } : Fant ikke elementet ' { f [ ' scrape_status_selector ' ] } ' i iframen " )
continue
full_text = await element . inner_text ( )
2026-03-04 13:17:10 +01:00
elif method == ' click_then_css ' :
# Vi forventer formatet: "knappe_selector||tekst_selector"
parts = f [ ' scrape_status_selector ' ] . split ( ' || ' )
if len ( parts ) != 2 :
warnings . append ( f " ❌ { f [ ' name ' ] } : Ugyldig selector for click_then_css (mangler ||) " )
continue
btn_selector , text_selector = parts
# 1. Finn og klikk på knappen
btn = page . locator ( btn_selector ) . first
if await btn . count ( ) == 0 :
warnings . append ( f " ❌ { f [ ' name ' ] } : Fant ikke knappen å klikke på: ' { btn_selector } ' " )
continue
await btn . click ( )
# 2. Vent 2 sekunder så animasjonen (sidepanelet) rekker å bli ferdig
await asyncio . sleep ( 2 )
# 3. Les av teksten
element = page . locator ( text_selector ) . first
if await element . count ( ) == 0 :
warnings . append ( f " ❌ { f [ ' name ' ] } : Fant ikke tekstboksen ' { text_selector } ' etter klikk " )
continue
full_text = await element . inner_text ( )
2026-02-28 09:20:56 +01:00
2026-03-02 19:39:40 +01:00
else :
warnings . append ( f " ⚠️ { f [ ' name ' ] } : Ukjent skrapemetode i databasen: ' { method } ' " )
continue
2026-02-28 09:20:56 +01:00
await conn . execute ( " UPDATE facilities SET status_updated_at = CURRENT_DATE WHERE id = $1 " , f [ ' id ' ] )
courses = await conn . fetch ( " SELECT id, name, status, scrape_keyword FROM courses WHERE facility_id = $1 " , f [ ' id ' ] )
for c in courses :
new_status = interpret_status ( full_text , c [ ' scrape_keyword ' ] )
if new_status == " NOT_FOUND " :
2026-03-02 19:39:40 +01:00
warnings . append ( f " ❓ { f [ ' name ' ] } ( { c [ ' name ' ] } ): Fant ikke søkeordet ' { c [ ' scrape_keyword ' ] } ' i teksten på siden. " )
2026-02-28 09:20:56 +01:00
continue
old_status = c [ ' status ' ] or " ukjent "
if new_status != old_status and new_status != " ukjent " :
await conn . execute ( " UPDATE courses SET status = $1 WHERE id = $2 " , new_status , c [ ' id ' ] )
changes . append ( f " 🔹 { f [ ' name ' ] } ( { c [ ' name ' ] } ): { old_status . upper ( ) } ➔ { new_status . upper ( ) } " )
print ( f " ✅ Oppdatert status for { f [ ' name ' ] } - { c [ ' name ' ] } " )
else :
2026-03-02 19:39:40 +01:00
successes . append ( f " ✅ { f [ ' name ' ] } ( { c [ ' name ' ] } ): { new_status . upper ( ) } " )
2026-02-28 09:20:56 +01:00
print ( f " - { c [ ' name ' ] } : Ingen endring ( { new_status . upper ( ) } ) " )
except Exception as e :
2026-03-02 19:39:40 +01:00
# Trekker ut kun første linje av feilmeldingen for å unngå massiv og stygg tekst i e-posten
err_msg = str ( e ) . split ( ' \n ' ) [ 0 ]
warnings . append ( f " 🔥 { f [ ' name ' ] } : Feil under skraping: { err_msg } " )
2026-02-28 09:20:56 +01:00
finally :
await page . close ( )
await browser . close ( )
await conn . close ( )
2026-03-02 19:39:40 +01:00
send_report ( changes , warnings , successes )
2026-02-28 09:20:56 +01:00
print ( " 🏁 Ferdig. " )
if __name__ == " __main__ " :
asyncio . run ( run_daily_scraping ( ) )