Før endringer i hcp-kalkulator
This commit is contained in:
parent
d49aa7b211
commit
1ff8ee2c26
9 changed files with 106 additions and 22 deletions
|
|
@ -5810,6 +5810,10 @@ async def update_scrape_settings(facility_id: int, settings: ScrapeSettingsUpdat
|
||||||
if not facility:
|
if not facility:
|
||||||
raise HTTPException(status_code=404, detail="Anlegget finnes ikke.")
|
raise HTTPException(status_code=404, detail="Anlegget finnes ikke.")
|
||||||
|
|
||||||
|
normalized_scrape_method = settings.scrape_method
|
||||||
|
if isinstance(normalized_scrape_method, str) and not normalized_scrape_method.strip():
|
||||||
|
normalized_scrape_method = "disabled"
|
||||||
|
|
||||||
# Oppdater verdiene i databasen inkludert AI instruks
|
# Oppdater verdiene i databasen inkludert AI instruks
|
||||||
await conn.execute("""
|
await conn.execute("""
|
||||||
UPDATE facilities
|
UPDATE facilities
|
||||||
|
|
@ -5819,14 +5823,14 @@ async def update_scrape_settings(facility_id: int, settings: ScrapeSettingsUpdat
|
||||||
ai_instruction = $4
|
ai_instruction = $4
|
||||||
WHERE id = $5
|
WHERE id = $5
|
||||||
""",
|
""",
|
||||||
settings.scrape_method,
|
normalized_scrape_method,
|
||||||
settings.scrape_status_url,
|
settings.scrape_status_url,
|
||||||
settings.scrape_status_selector,
|
settings.scrape_status_selector,
|
||||||
settings.ai_instruction,
|
settings.ai_instruction,
|
||||||
facility_id)
|
facility_id)
|
||||||
|
|
||||||
# Hvis metoden er manuell, tvinger vi gjennom de nye banestatusene direkte
|
# Hvis metoden er manuell, tvinger vi gjennom de nye banestatusene direkte
|
||||||
if settings.scrape_method == 'manual' and settings.courses:
|
if normalized_scrape_method == 'manual' and settings.courses:
|
||||||
for c in settings.courses:
|
for c in settings.courses:
|
||||||
current_course = await conn.fetchrow(
|
current_course = await conn.fetchrow(
|
||||||
"SELECT id, facility_id, status FROM courses WHERE id = $1 AND facility_id = $2",
|
"SELECT id, facility_id, status FROM courses WHERE id = $1 AND facility_id = $2",
|
||||||
|
|
|
||||||
|
|
@ -19,7 +19,13 @@ from dotenv import load_dotenv
|
||||||
from env_config import get_database_url
|
from env_config import get_database_url
|
||||||
from playwright.async_api import async_playwright
|
from playwright.async_api import async_playwright
|
||||||
|
|
||||||
from scrape_utils import ProgressCallback, emit_progress, make_progress_event, parse_llm_json
|
from scrape_utils import (
|
||||||
|
ProgressCallback,
|
||||||
|
emit_progress,
|
||||||
|
exclude_discontinued_facilities_clause,
|
||||||
|
make_progress_event,
|
||||||
|
parse_llm_json,
|
||||||
|
)
|
||||||
|
|
||||||
load_dotenv()
|
load_dotenv()
|
||||||
|
|
||||||
|
|
@ -194,6 +200,7 @@ async def run_golfpakker_scraper(facility_ids=None, progress_callback: ProgressC
|
||||||
FROM facilities
|
FROM facilities
|
||||||
WHERE COALESCE(NULLIF(TRIM(golfpakker_url), ''), NULLIF(TRIM(website_url), '')) IS NOT NULL
|
WHERE COALESCE(NULLIF(TRIM(golfpakker_url), ''), NULLIF(TRIM(website_url), '')) IS NOT NULL
|
||||||
"""
|
"""
|
||||||
|
query += exclude_discontinued_facilities_clause("facilities")
|
||||||
if facility_ids:
|
if facility_ids:
|
||||||
query += f" AND id IN ({','.join(map(str, facility_ids))})"
|
query += f" AND id IN ({','.join(map(str, facility_ids))})"
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -16,7 +16,13 @@ from playwright.async_api import async_playwright
|
||||||
import google.generativeai as genai
|
import google.generativeai as genai
|
||||||
from dotenv import load_dotenv
|
from dotenv import load_dotenv
|
||||||
from env_config import get_database_url
|
from env_config import get_database_url
|
||||||
from scrape_utils import ProgressCallback, emit_progress, make_progress_event, parse_llm_json
|
from scrape_utils import (
|
||||||
|
ProgressCallback,
|
||||||
|
emit_progress,
|
||||||
|
exclude_discontinued_facilities_clause,
|
||||||
|
make_progress_event,
|
||||||
|
parse_llm_json,
|
||||||
|
)
|
||||||
|
|
||||||
load_dotenv()
|
load_dotenv()
|
||||||
|
|
||||||
|
|
@ -110,7 +116,11 @@ async def run_greenfee_scraper(facility_ids=None, progress_callback: ProgressCal
|
||||||
failed_count = 0
|
failed_count = 0
|
||||||
|
|
||||||
try:
|
try:
|
||||||
query = "SELECT id, name, greenfee_url FROM facilities WHERE greenfee_url IS NOT NULL AND greenfee_url != ''"
|
query = (
|
||||||
|
"SELECT id, name, greenfee_url FROM facilities "
|
||||||
|
"WHERE greenfee_url IS NOT NULL AND greenfee_url != ''"
|
||||||
|
f"{exclude_discontinued_facilities_clause('facilities')}"
|
||||||
|
)
|
||||||
if facility_ids:
|
if facility_ids:
|
||||||
query += f" AND id IN ({','.join(map(str, facility_ids))})"
|
query += f" AND id IN ({','.join(map(str, facility_ids))})"
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -17,7 +17,13 @@ from playwright.async_api import async_playwright
|
||||||
import google.generativeai as genai
|
import google.generativeai as genai
|
||||||
from dotenv import load_dotenv
|
from dotenv import load_dotenv
|
||||||
from env_config import get_database_url
|
from env_config import get_database_url
|
||||||
from scrape_utils import ProgressCallback, emit_progress, make_progress_event, parse_llm_json
|
from scrape_utils import (
|
||||||
|
ProgressCallback,
|
||||||
|
emit_progress,
|
||||||
|
exclude_discontinued_facilities_clause,
|
||||||
|
make_progress_event,
|
||||||
|
parse_llm_json,
|
||||||
|
)
|
||||||
|
|
||||||
load_dotenv()
|
load_dotenv()
|
||||||
|
|
||||||
|
|
@ -105,7 +111,11 @@ async def run_scraper(facility_ids=None, progress_callback: ProgressCallback | N
|
||||||
failed_count = 0
|
failed_count = 0
|
||||||
|
|
||||||
try:
|
try:
|
||||||
query = "SELECT id, name, medlemskap_url FROM facilities WHERE medlemskap_url IS NOT NULL AND medlemskap_url != ''"
|
query = (
|
||||||
|
"SELECT id, name, medlemskap_url FROM facilities "
|
||||||
|
"WHERE medlemskap_url IS NOT NULL AND medlemskap_url != ''"
|
||||||
|
f"{exclude_discontinued_facilities_clause('facilities')}"
|
||||||
|
)
|
||||||
if facility_ids:
|
if facility_ids:
|
||||||
query += f" AND id IN ({','.join(map(str, facility_ids))})"
|
query += f" AND id IN ({','.join(map(str, facility_ids))})"
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -17,11 +17,17 @@ from google import genai
|
||||||
from dotenv import load_dotenv
|
from dotenv import load_dotenv
|
||||||
from course_status_history import ensure_course_status_history_table, log_course_status_change
|
from course_status_history import ensure_course_status_history_table, log_course_status_change
|
||||||
from env_config import get_database_url
|
from env_config import get_database_url
|
||||||
from scrape_utils import ProgressCallback, emit_progress, make_progress_event
|
from scrape_utils import (
|
||||||
|
ProgressCallback,
|
||||||
|
emit_progress,
|
||||||
|
exclude_discontinued_facilities_clause,
|
||||||
|
make_progress_event,
|
||||||
|
)
|
||||||
|
|
||||||
load_dotenv()
|
load_dotenv()
|
||||||
|
|
||||||
DB_URL = get_database_url()
|
DB_URL = get_database_url()
|
||||||
|
DISABLED_STATUS_METHODS = {"", "disabled", "manual"}
|
||||||
|
|
||||||
# ==========================================
|
# ==========================================
|
||||||
# KONFIGURERER GEMINI AI (NY SDK)
|
# KONFIGURERER GEMINI AI (NY SDK)
|
||||||
|
|
@ -154,17 +160,18 @@ async def run_daily_scraping(facility_ids=None, progress_callback: ProgressCallb
|
||||||
print(f"🚀 Starter sjekk {datetime.now().strftime('%H:%M:%S')}...")
|
print(f"🚀 Starter sjekk {datetime.now().strftime('%H:%M:%S')}...")
|
||||||
conn = await asyncpg.connect(DB_URL)
|
conn = await asyncpg.connect(DB_URL)
|
||||||
await ensure_course_status_history_table(conn)
|
await ensure_course_status_history_table(conn)
|
||||||
|
facility_filter = exclude_discontinued_facilities_clause("facilities")
|
||||||
|
|
||||||
if facility_ids:
|
if facility_ids:
|
||||||
print(f"📌 Kjører skraping KUN for anlegg-ID(er): {facility_ids}")
|
print(f"📌 Kjører skraping KUN for anlegg-ID(er): {facility_ids}")
|
||||||
facilities = await conn.fetch(
|
facilities = await conn.fetch(
|
||||||
"SELECT id, name, scrape_status_url, scrape_status_selector, scrape_method, ai_instruction FROM facilities WHERE scrape_status_url IS NOT NULL AND id = ANY($1::int[])",
|
f"SELECT id, name, scrape_status_url, scrape_status_selector, scrape_method, ai_instruction FROM facilities WHERE scrape_status_url IS NOT NULL AND id = ANY($1::int[]){facility_filter}",
|
||||||
facility_ids
|
facility_ids
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
print("🌍 Kjører skraping for ALLE anlegg med scrape_status_url...")
|
print("🌍 Kjører skraping for ALLE anlegg med scrape_status_url...")
|
||||||
facilities = await conn.fetch(
|
facilities = await conn.fetch(
|
||||||
"SELECT id, name, scrape_status_url, scrape_status_selector, scrape_method, ai_instruction FROM facilities WHERE scrape_status_url IS NOT NULL"
|
f"SELECT id, name, scrape_status_url, scrape_status_selector, scrape_method, ai_instruction FROM facilities WHERE scrape_status_url IS NOT NULL{facility_filter}"
|
||||||
)
|
)
|
||||||
|
|
||||||
if not facilities:
|
if not facilities:
|
||||||
|
|
@ -207,7 +214,9 @@ async def run_daily_scraping(facility_ids=None, progress_callback: ProgressCallb
|
||||||
context = await browser.new_context()
|
context = await browser.new_context()
|
||||||
|
|
||||||
for index, f in enumerate(facilities, start=1):
|
for index, f in enumerate(facilities, start=1):
|
||||||
method = f.get('scrape_method') or 'css_selector'
|
raw_method = (f.get('scrape_method') or "").strip()
|
||||||
|
method = raw_method or 'css_selector'
|
||||||
|
method_label = "disabled" if raw_method in {"", "disabled"} else method
|
||||||
facility_id = f['id']
|
facility_id = f['id']
|
||||||
facility_name = f['name']
|
facility_name = f['name']
|
||||||
|
|
||||||
|
|
@ -219,15 +228,21 @@ async def run_daily_scraping(facility_ids=None, progress_callback: ProgressCallb
|
||||||
facility_id=facility_id,
|
facility_id=facility_id,
|
||||||
facility_name=facility_name,
|
facility_name=facility_name,
|
||||||
outcome="info",
|
outcome="info",
|
||||||
message=f"Starter sjekk med metode {method}.",
|
message=f"Starter sjekk med metode {method_label}.",
|
||||||
processed=index - 1,
|
processed=index - 1,
|
||||||
total=total_facilities,
|
total=total_facilities,
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
|
|
||||||
if method == 'manual':
|
if raw_method in DISABLED_STATUS_METHODS:
|
||||||
successes.append(f"⏸️ {f['name']}: Hoppet over (Manuell overstyring)")
|
skip_reason = "Manuell overstyring" if raw_method == "manual" else "Skraping avslått"
|
||||||
print(f" ⏸️ Hopper over skraping av {f['name']} (Satt til Manuell)")
|
progress_message = (
|
||||||
|
"Hoppet over fordi anlegget er satt til manuell overstyring."
|
||||||
|
if raw_method == "manual"
|
||||||
|
else "Hoppet over fordi banestatusskraping er avslått for anlegget."
|
||||||
|
)
|
||||||
|
successes.append(f"⏸️ {f['name']}: Hoppet over ({skip_reason})")
|
||||||
|
print(f" ⏸️ Hopper over skraping av {f['name']} ({skip_reason})")
|
||||||
skipped_facilities += 1
|
skipped_facilities += 1
|
||||||
await emit_progress(
|
await emit_progress(
|
||||||
progress_callback,
|
progress_callback,
|
||||||
|
|
@ -241,7 +256,7 @@ async def run_daily_scraping(facility_ids=None, progress_callback: ProgressCallb
|
||||||
facility_id=facility_id,
|
facility_id=facility_id,
|
||||||
facility_name=facility_name,
|
facility_name=facility_name,
|
||||||
outcome="warning",
|
outcome="warning",
|
||||||
message="Hoppet over fordi anlegget er satt til manuell overstyring.",
|
message=progress_message,
|
||||||
processed=index,
|
processed=index,
|
||||||
total=total_facilities,
|
total=total_facilities,
|
||||||
),
|
),
|
||||||
|
|
|
||||||
|
|
@ -4,6 +4,24 @@ from typing import Any, Awaitable, Callable
|
||||||
ProgressCallback = Callable[[dict[str, Any]], Awaitable[None]]
|
ProgressCallback = Callable[[dict[str, Any]], Awaitable[None]]
|
||||||
|
|
||||||
|
|
||||||
|
def exclude_discontinued_facilities_clause(facility_table: str = "facilities") -> str:
|
||||||
|
return f"""
|
||||||
|
AND (
|
||||||
|
NOT EXISTS (
|
||||||
|
SELECT 1
|
||||||
|
FROM courses course_filter
|
||||||
|
WHERE course_filter.facility_id = {facility_table}.id
|
||||||
|
)
|
||||||
|
OR EXISTS (
|
||||||
|
SELECT 1
|
||||||
|
FROM courses course_filter
|
||||||
|
WHERE course_filter.facility_id = {facility_table}.id
|
||||||
|
AND COALESCE(course_filter.status, 'ukjent') <> 'nedlagt'
|
||||||
|
)
|
||||||
|
)
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
async def emit_progress(progress_callback: ProgressCallback | None, **payload: Any) -> None:
|
async def emit_progress(progress_callback: ProgressCallback | None, **payload: Any) -> None:
|
||||||
if progress_callback is None:
|
if progress_callback is None:
|
||||||
return
|
return
|
||||||
|
|
|
||||||
|
|
@ -16,7 +16,13 @@ from playwright.async_api import async_playwright
|
||||||
import google.generativeai as genai
|
import google.generativeai as genai
|
||||||
from dotenv import load_dotenv
|
from dotenv import load_dotenv
|
||||||
from env_config import get_database_url
|
from env_config import get_database_url
|
||||||
from scrape_utils import ProgressCallback, emit_progress, make_progress_event, parse_llm_json
|
from scrape_utils import (
|
||||||
|
ProgressCallback,
|
||||||
|
emit_progress,
|
||||||
|
exclude_discontinued_facilities_clause,
|
||||||
|
make_progress_event,
|
||||||
|
parse_llm_json,
|
||||||
|
)
|
||||||
from vtg_courses import filter_upcoming_courses
|
from vtg_courses import filter_upcoming_courses
|
||||||
|
|
||||||
load_dotenv()
|
load_dotenv()
|
||||||
|
|
@ -110,7 +116,11 @@ async def run_vtg_scraper(facility_ids=None, progress_callback: ProgressCallback
|
||||||
failed_count = 0
|
failed_count = 0
|
||||||
|
|
||||||
try:
|
try:
|
||||||
query = "SELECT id, name, vtg_lenke FROM facilities WHERE vtg_lenke IS NOT NULL AND vtg_lenke != ''"
|
query = (
|
||||||
|
"SELECT id, name, vtg_lenke FROM facilities "
|
||||||
|
"WHERE vtg_lenke IS NOT NULL AND vtg_lenke != ''"
|
||||||
|
f"{exclude_discontinued_facilities_clause('facilities')}"
|
||||||
|
)
|
||||||
if facility_ids:
|
if facility_ids:
|
||||||
query += f" AND id IN ({','.join(map(str, facility_ids))})"
|
query += f" AND id IN ({','.join(map(str, facility_ids))})"
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -197,6 +197,11 @@ const EMPTY_MANUAL_OVERRIDE_FORM: ManualOverrideForm = {
|
||||||
vtg_datoer: [],
|
vtg_datoer: [],
|
||||||
};
|
};
|
||||||
|
|
||||||
|
function normalizeScrapeMethod(scrapeMethod: string | null | undefined) {
|
||||||
|
if (typeof scrapeMethod === 'string' && scrapeMethod.trim() === '') return 'disabled';
|
||||||
|
return scrapeMethod ?? 'css_selector';
|
||||||
|
}
|
||||||
|
|
||||||
const toInputString = (value: unknown) => {
|
const toInputString = (value: unknown) => {
|
||||||
if (value === null || value === undefined) return '';
|
if (value === null || value === undefined) return '';
|
||||||
return String(value);
|
return String(value);
|
||||||
|
|
@ -709,7 +714,7 @@ export default function AdminDashboard() {
|
||||||
setEditForm({
|
setEditForm({
|
||||||
scrape_status_url: facility.scrape_status_url || '',
|
scrape_status_url: facility.scrape_status_url || '',
|
||||||
scrape_status_selector: facility.scrape_status_selector || '',
|
scrape_status_selector: facility.scrape_status_selector || '',
|
||||||
scrape_method: facility.scrape_method || 'css_selector',
|
scrape_method: normalizeScrapeMethod(facility.scrape_method),
|
||||||
ai_instruction: facility.ai_instruction || '',
|
ai_instruction: facility.ai_instruction || '',
|
||||||
courses: facility.course_statuses ? facility.course_statuses.map((c: any) => ({id: c.id, name: c.name, status: c.status})) : []
|
courses: facility.course_statuses ? facility.course_statuses.map((c: any) => ({id: c.id, name: c.name, status: c.status})) : []
|
||||||
});
|
});
|
||||||
|
|
@ -950,6 +955,7 @@ export default function AdminDashboard() {
|
||||||
<option value="llm_parse">✨ Gemini AI (LLM)</option>
|
<option value="llm_parse">✨ Gemini AI (LLM)</option>
|
||||||
<option value="iframe_golfbox">Golfbox iframe</option>
|
<option value="iframe_golfbox">Golfbox iframe</option>
|
||||||
<option value="click_then_css">Auto-klikk + CSS</option>
|
<option value="click_then_css">Auto-klikk + CSS</option>
|
||||||
|
<option value="disabled">Ingen (Avslått)</option>
|
||||||
<option value="manual">🚨 Manuell (Ikke skrap)</option>
|
<option value="manual">🚨 Manuell (Ikke skrap)</option>
|
||||||
</select>
|
</select>
|
||||||
</div>
|
</div>
|
||||||
|
|
|
||||||
|
|
@ -11,9 +11,13 @@ interface Facility {
|
||||||
scrape_status_selector?: string;
|
scrape_status_selector?: string;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function getInitialMethod(scrapeMethod?: string | null) {
|
||||||
|
if (typeof scrapeMethod === 'string' && scrapeMethod.trim() === '') return 'disabled';
|
||||||
|
return scrapeMethod ?? 'css_selector';
|
||||||
|
}
|
||||||
|
|
||||||
export default function ScrapeMethodSelect({ facility }: { facility: Facility }) {
|
export default function ScrapeMethodSelect({ facility }: { facility: Facility }) {
|
||||||
// Setter standardverdi til 'css_selector' hvis den er tom i databasen
|
const [method, setMethod] = useState(getInitialMethod(facility.scrape_method));
|
||||||
const [method, setMethod] = useState(facility.scrape_method || 'css_selector');
|
|
||||||
const [isLoading, setIsLoading] = useState(false);
|
const [isLoading, setIsLoading] = useState(false);
|
||||||
const [statusColor, setStatusColor] = useState('bg-transparent'); // For å gi visuell feedback
|
const [statusColor, setStatusColor] = useState('bg-transparent'); // For å gi visuell feedback
|
||||||
|
|
||||||
|
|
@ -66,7 +70,7 @@ export default function ScrapeMethodSelect({ facility }: { facility: Facility })
|
||||||
<option value="llm_parse">✨ Gemini AI (LLM)</option>
|
<option value="llm_parse">✨ Gemini AI (LLM)</option>
|
||||||
<option value="iframe_golfbox">Golfbox iframe</option>
|
<option value="iframe_golfbox">Golfbox iframe</option>
|
||||||
<option value="click_then_css">Auto-klikk + CSS</option>
|
<option value="click_then_css">Auto-klikk + CSS</option>
|
||||||
<option value="">Ingen (Avslått)</option>
|
<option value="disabled">Ingen (Avslått)</option>
|
||||||
</select>
|
</select>
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue