Før endringer i hcp-kalkulator
This commit is contained in:
parent
d49aa7b211
commit
1ff8ee2c26
9 changed files with 106 additions and 22 deletions
|
|
@ -5810,6 +5810,10 @@ async def update_scrape_settings(facility_id: int, settings: ScrapeSettingsUpdat
|
|||
if not facility:
|
||||
raise HTTPException(status_code=404, detail="Anlegget finnes ikke.")
|
||||
|
||||
normalized_scrape_method = settings.scrape_method
|
||||
if isinstance(normalized_scrape_method, str) and not normalized_scrape_method.strip():
|
||||
normalized_scrape_method = "disabled"
|
||||
|
||||
# Oppdater verdiene i databasen inkludert AI instruks
|
||||
await conn.execute("""
|
||||
UPDATE facilities
|
||||
|
|
@ -5819,14 +5823,14 @@ async def update_scrape_settings(facility_id: int, settings: ScrapeSettingsUpdat
|
|||
ai_instruction = $4
|
||||
WHERE id = $5
|
||||
""",
|
||||
settings.scrape_method,
|
||||
normalized_scrape_method,
|
||||
settings.scrape_status_url,
|
||||
settings.scrape_status_selector,
|
||||
settings.ai_instruction,
|
||||
facility_id)
|
||||
|
||||
# Hvis metoden er manuell, tvinger vi gjennom de nye banestatusene direkte
|
||||
if settings.scrape_method == 'manual' and settings.courses:
|
||||
if normalized_scrape_method == 'manual' and settings.courses:
|
||||
for c in settings.courses:
|
||||
current_course = await conn.fetchrow(
|
||||
"SELECT id, facility_id, status FROM courses WHERE id = $1 AND facility_id = $2",
|
||||
|
|
|
|||
|
|
@ -19,7 +19,13 @@ from dotenv import load_dotenv
|
|||
from env_config import get_database_url
|
||||
from playwright.async_api import async_playwright
|
||||
|
||||
from scrape_utils import ProgressCallback, emit_progress, make_progress_event, parse_llm_json
|
||||
from scrape_utils import (
|
||||
ProgressCallback,
|
||||
emit_progress,
|
||||
exclude_discontinued_facilities_clause,
|
||||
make_progress_event,
|
||||
parse_llm_json,
|
||||
)
|
||||
|
||||
load_dotenv()
|
||||
|
||||
|
|
@ -194,6 +200,7 @@ async def run_golfpakker_scraper(facility_ids=None, progress_callback: ProgressC
|
|||
FROM facilities
|
||||
WHERE COALESCE(NULLIF(TRIM(golfpakker_url), ''), NULLIF(TRIM(website_url), '')) IS NOT NULL
|
||||
"""
|
||||
query += exclude_discontinued_facilities_clause("facilities")
|
||||
if facility_ids:
|
||||
query += f" AND id IN ({','.join(map(str, facility_ids))})"
|
||||
|
||||
|
|
|
|||
|
|
@ -16,7 +16,13 @@ from playwright.async_api import async_playwright
|
|||
import google.generativeai as genai
|
||||
from dotenv import load_dotenv
|
||||
from env_config import get_database_url
|
||||
from scrape_utils import ProgressCallback, emit_progress, make_progress_event, parse_llm_json
|
||||
from scrape_utils import (
|
||||
ProgressCallback,
|
||||
emit_progress,
|
||||
exclude_discontinued_facilities_clause,
|
||||
make_progress_event,
|
||||
parse_llm_json,
|
||||
)
|
||||
|
||||
load_dotenv()
|
||||
|
||||
|
|
@ -110,7 +116,11 @@ async def run_greenfee_scraper(facility_ids=None, progress_callback: ProgressCal
|
|||
failed_count = 0
|
||||
|
||||
try:
|
||||
query = "SELECT id, name, greenfee_url FROM facilities WHERE greenfee_url IS NOT NULL AND greenfee_url != ''"
|
||||
query = (
|
||||
"SELECT id, name, greenfee_url FROM facilities "
|
||||
"WHERE greenfee_url IS NOT NULL AND greenfee_url != ''"
|
||||
f"{exclude_discontinued_facilities_clause('facilities')}"
|
||||
)
|
||||
if facility_ids:
|
||||
query += f" AND id IN ({','.join(map(str, facility_ids))})"
|
||||
|
||||
|
|
|
|||
|
|
@ -17,7 +17,13 @@ from playwright.async_api import async_playwright
|
|||
import google.generativeai as genai
|
||||
from dotenv import load_dotenv
|
||||
from env_config import get_database_url
|
||||
from scrape_utils import ProgressCallback, emit_progress, make_progress_event, parse_llm_json
|
||||
from scrape_utils import (
|
||||
ProgressCallback,
|
||||
emit_progress,
|
||||
exclude_discontinued_facilities_clause,
|
||||
make_progress_event,
|
||||
parse_llm_json,
|
||||
)
|
||||
|
||||
load_dotenv()
|
||||
|
||||
|
|
@ -105,7 +111,11 @@ async def run_scraper(facility_ids=None, progress_callback: ProgressCallback | N
|
|||
failed_count = 0
|
||||
|
||||
try:
|
||||
query = "SELECT id, name, medlemskap_url FROM facilities WHERE medlemskap_url IS NOT NULL AND medlemskap_url != ''"
|
||||
query = (
|
||||
"SELECT id, name, medlemskap_url FROM facilities "
|
||||
"WHERE medlemskap_url IS NOT NULL AND medlemskap_url != ''"
|
||||
f"{exclude_discontinued_facilities_clause('facilities')}"
|
||||
)
|
||||
if facility_ids:
|
||||
query += f" AND id IN ({','.join(map(str, facility_ids))})"
|
||||
|
||||
|
|
|
|||
|
|
@ -17,11 +17,17 @@ from google import genai
|
|||
from dotenv import load_dotenv
|
||||
from course_status_history import ensure_course_status_history_table, log_course_status_change
|
||||
from env_config import get_database_url
|
||||
from scrape_utils import ProgressCallback, emit_progress, make_progress_event
|
||||
from scrape_utils import (
|
||||
ProgressCallback,
|
||||
emit_progress,
|
||||
exclude_discontinued_facilities_clause,
|
||||
make_progress_event,
|
||||
)
|
||||
|
||||
load_dotenv()
|
||||
|
||||
DB_URL = get_database_url()
|
||||
DISABLED_STATUS_METHODS = {"", "disabled", "manual"}
|
||||
|
||||
# ==========================================
|
||||
# KONFIGURERER GEMINI AI (NY SDK)
|
||||
|
|
@ -154,17 +160,18 @@ async def run_daily_scraping(facility_ids=None, progress_callback: ProgressCallb
|
|||
print(f"🚀 Starter sjekk {datetime.now().strftime('%H:%M:%S')}...")
|
||||
conn = await asyncpg.connect(DB_URL)
|
||||
await ensure_course_status_history_table(conn)
|
||||
facility_filter = exclude_discontinued_facilities_clause("facilities")
|
||||
|
||||
if facility_ids:
|
||||
print(f"📌 Kjører skraping KUN for anlegg-ID(er): {facility_ids}")
|
||||
facilities = await conn.fetch(
|
||||
"SELECT id, name, scrape_status_url, scrape_status_selector, scrape_method, ai_instruction FROM facilities WHERE scrape_status_url IS NOT NULL AND id = ANY($1::int[])",
|
||||
f"SELECT id, name, scrape_status_url, scrape_status_selector, scrape_method, ai_instruction FROM facilities WHERE scrape_status_url IS NOT NULL AND id = ANY($1::int[]){facility_filter}",
|
||||
facility_ids
|
||||
)
|
||||
else:
|
||||
print("🌍 Kjører skraping for ALLE anlegg med scrape_status_url...")
|
||||
facilities = await conn.fetch(
|
||||
"SELECT id, name, scrape_status_url, scrape_status_selector, scrape_method, ai_instruction FROM facilities WHERE scrape_status_url IS NOT NULL"
|
||||
f"SELECT id, name, scrape_status_url, scrape_status_selector, scrape_method, ai_instruction FROM facilities WHERE scrape_status_url IS NOT NULL{facility_filter}"
|
||||
)
|
||||
|
||||
if not facilities:
|
||||
|
|
@ -207,7 +214,9 @@ async def run_daily_scraping(facility_ids=None, progress_callback: ProgressCallb
|
|||
context = await browser.new_context()
|
||||
|
||||
for index, f in enumerate(facilities, start=1):
|
||||
method = f.get('scrape_method') or 'css_selector'
|
||||
raw_method = (f.get('scrape_method') or "").strip()
|
||||
method = raw_method or 'css_selector'
|
||||
method_label = "disabled" if raw_method in {"", "disabled"} else method
|
||||
facility_id = f['id']
|
||||
facility_name = f['name']
|
||||
|
||||
|
|
@ -219,15 +228,21 @@ async def run_daily_scraping(facility_ids=None, progress_callback: ProgressCallb
|
|||
facility_id=facility_id,
|
||||
facility_name=facility_name,
|
||||
outcome="info",
|
||||
message=f"Starter sjekk med metode {method}.",
|
||||
message=f"Starter sjekk med metode {method_label}.",
|
||||
processed=index - 1,
|
||||
total=total_facilities,
|
||||
),
|
||||
)
|
||||
|
||||
if method == 'manual':
|
||||
successes.append(f"⏸️ {f['name']}: Hoppet over (Manuell overstyring)")
|
||||
print(f" ⏸️ Hopper over skraping av {f['name']} (Satt til Manuell)")
|
||||
if raw_method in DISABLED_STATUS_METHODS:
|
||||
skip_reason = "Manuell overstyring" if raw_method == "manual" else "Skraping avslått"
|
||||
progress_message = (
|
||||
"Hoppet over fordi anlegget er satt til manuell overstyring."
|
||||
if raw_method == "manual"
|
||||
else "Hoppet over fordi banestatusskraping er avslått for anlegget."
|
||||
)
|
||||
successes.append(f"⏸️ {f['name']}: Hoppet over ({skip_reason})")
|
||||
print(f" ⏸️ Hopper over skraping av {f['name']} ({skip_reason})")
|
||||
skipped_facilities += 1
|
||||
await emit_progress(
|
||||
progress_callback,
|
||||
|
|
@ -241,7 +256,7 @@ async def run_daily_scraping(facility_ids=None, progress_callback: ProgressCallb
|
|||
facility_id=facility_id,
|
||||
facility_name=facility_name,
|
||||
outcome="warning",
|
||||
message="Hoppet over fordi anlegget er satt til manuell overstyring.",
|
||||
message=progress_message,
|
||||
processed=index,
|
||||
total=total_facilities,
|
||||
),
|
||||
|
|
|
|||
|
|
@ -4,6 +4,24 @@ from typing import Any, Awaitable, Callable
|
|||
ProgressCallback = Callable[[dict[str, Any]], Awaitable[None]]
|
||||
|
||||
|
||||
def exclude_discontinued_facilities_clause(facility_table: str = "facilities") -> str:
|
||||
return f"""
|
||||
AND (
|
||||
NOT EXISTS (
|
||||
SELECT 1
|
||||
FROM courses course_filter
|
||||
WHERE course_filter.facility_id = {facility_table}.id
|
||||
)
|
||||
OR EXISTS (
|
||||
SELECT 1
|
||||
FROM courses course_filter
|
||||
WHERE course_filter.facility_id = {facility_table}.id
|
||||
AND COALESCE(course_filter.status, 'ukjent') <> 'nedlagt'
|
||||
)
|
||||
)
|
||||
"""
|
||||
|
||||
|
||||
async def emit_progress(progress_callback: ProgressCallback | None, **payload: Any) -> None:
|
||||
if progress_callback is None:
|
||||
return
|
||||
|
|
|
|||
|
|
@ -16,7 +16,13 @@ from playwright.async_api import async_playwright
|
|||
import google.generativeai as genai
|
||||
from dotenv import load_dotenv
|
||||
from env_config import get_database_url
|
||||
from scrape_utils import ProgressCallback, emit_progress, make_progress_event, parse_llm_json
|
||||
from scrape_utils import (
|
||||
ProgressCallback,
|
||||
emit_progress,
|
||||
exclude_discontinued_facilities_clause,
|
||||
make_progress_event,
|
||||
parse_llm_json,
|
||||
)
|
||||
from vtg_courses import filter_upcoming_courses
|
||||
|
||||
load_dotenv()
|
||||
|
|
@ -110,7 +116,11 @@ async def run_vtg_scraper(facility_ids=None, progress_callback: ProgressCallback
|
|||
failed_count = 0
|
||||
|
||||
try:
|
||||
query = "SELECT id, name, vtg_lenke FROM facilities WHERE vtg_lenke IS NOT NULL AND vtg_lenke != ''"
|
||||
query = (
|
||||
"SELECT id, name, vtg_lenke FROM facilities "
|
||||
"WHERE vtg_lenke IS NOT NULL AND vtg_lenke != ''"
|
||||
f"{exclude_discontinued_facilities_clause('facilities')}"
|
||||
)
|
||||
if facility_ids:
|
||||
query += f" AND id IN ({','.join(map(str, facility_ids))})"
|
||||
|
||||
|
|
|
|||
|
|
@ -197,6 +197,11 @@ const EMPTY_MANUAL_OVERRIDE_FORM: ManualOverrideForm = {
|
|||
vtg_datoer: [],
|
||||
};
|
||||
|
||||
function normalizeScrapeMethod(scrapeMethod: string | null | undefined) {
|
||||
if (typeof scrapeMethod === 'string' && scrapeMethod.trim() === '') return 'disabled';
|
||||
return scrapeMethod ?? 'css_selector';
|
||||
}
|
||||
|
||||
const toInputString = (value: unknown) => {
|
||||
if (value === null || value === undefined) return '';
|
||||
return String(value);
|
||||
|
|
@ -709,7 +714,7 @@ export default function AdminDashboard() {
|
|||
setEditForm({
|
||||
scrape_status_url: facility.scrape_status_url || '',
|
||||
scrape_status_selector: facility.scrape_status_selector || '',
|
||||
scrape_method: facility.scrape_method || 'css_selector',
|
||||
scrape_method: normalizeScrapeMethod(facility.scrape_method),
|
||||
ai_instruction: facility.ai_instruction || '',
|
||||
courses: facility.course_statuses ? facility.course_statuses.map((c: any) => ({id: c.id, name: c.name, status: c.status})) : []
|
||||
});
|
||||
|
|
@ -950,6 +955,7 @@ export default function AdminDashboard() {
|
|||
<option value="llm_parse">✨ Gemini AI (LLM)</option>
|
||||
<option value="iframe_golfbox">Golfbox iframe</option>
|
||||
<option value="click_then_css">Auto-klikk + CSS</option>
|
||||
<option value="disabled">Ingen (Avslått)</option>
|
||||
<option value="manual">🚨 Manuell (Ikke skrap)</option>
|
||||
</select>
|
||||
</div>
|
||||
|
|
|
|||
|
|
@ -11,9 +11,13 @@ interface Facility {
|
|||
scrape_status_selector?: string;
|
||||
}
|
||||
|
||||
function getInitialMethod(scrapeMethod?: string | null) {
|
||||
if (typeof scrapeMethod === 'string' && scrapeMethod.trim() === '') return 'disabled';
|
||||
return scrapeMethod ?? 'css_selector';
|
||||
}
|
||||
|
||||
export default function ScrapeMethodSelect({ facility }: { facility: Facility }) {
|
||||
// Setter standardverdi til 'css_selector' hvis den er tom i databasen
|
||||
const [method, setMethod] = useState(facility.scrape_method || 'css_selector');
|
||||
const [method, setMethod] = useState(getInitialMethod(facility.scrape_method));
|
||||
const [isLoading, setIsLoading] = useState(false);
|
||||
const [statusColor, setStatusColor] = useState('bg-transparent'); // For å gi visuell feedback
|
||||
|
||||
|
|
@ -66,7 +70,7 @@ export default function ScrapeMethodSelect({ facility }: { facility: Facility })
|
|||
<option value="llm_parse">✨ Gemini AI (LLM)</option>
|
||||
<option value="iframe_golfbox">Golfbox iframe</option>
|
||||
<option value="click_then_css">Auto-klikk + CSS</option>
|
||||
<option value="">Ingen (Avslått)</option>
|
||||
<option value="disabled">Ingen (Avslått)</option>
|
||||
</select>
|
||||
);
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in a new issue