Nye-TeeOff/backend/vtg_courses.py

202 lines
6.1 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import re
from datetime import date, datetime
from typing import Any
MONTH_MAP: dict[str, int] = {
"januar": 1,
"jan": 1,
"februar": 2,
"feb": 2,
"mars": 3,
"mar": 3,
"april": 4,
"apr": 4,
"mai": 5,
"juni": 6,
"jun": 6,
"juli": 7,
"jul": 7,
"august": 8,
"aug": 8,
"september": 9,
"sep": 9,
"sept": 9,
"oktober": 10,
"okt": 10,
"november": 11,
"nov": 11,
"desember": 12,
"des": 12,
}
def normalize_whitespace(value: str) -> str:
return re.sub(r"\s+", " ", str(value or "")).strip()
def _to_date(year: int, month: int, day: int) -> date | None:
try:
return date(year, month, day)
except ValueError:
return None
def _infer_year(month: int, day: int, explicit_year: int | None, today: date) -> int:
if explicit_year:
return explicit_year
candidate = _to_date(today.year, month, day)
if candidate and candidate < today.replace(day=max(1, min(today.day, 28))):
if (today - candidate).days > 7:
return today.year + 1
return today.year
def _parse_numeric_date(raw: str) -> date | None:
match = re.search(r"\b(\d{1,2})[./](\d{1,2})[./](\d{2,4})\b", raw)
if not match:
return None
day = int(match.group(1))
month = int(match.group(2))
year = int(match.group(3))
if year < 100:
year += 2000
return _to_date(year, month, day)
def _parse_textual_dates(raw: str, today: date) -> list[date]:
results: list[date] = []
pattern = re.compile(
r"\b(\d{1,2})\.?\s*(" + "|".join(sorted(MONTH_MAP.keys(), key=len, reverse=True)) + r")\b(?:\s+(20\d{2}))?",
re.IGNORECASE,
)
for match in pattern.finditer(raw):
day = int(match.group(1))
month = MONTH_MAP.get(match.group(2).lower())
if not month:
continue
explicit_year = int(match.group(3)) if match.group(3) else None
year = _infer_year(month, day, explicit_year, today)
candidate = _to_date(year, month, day)
if candidate:
results.append(candidate)
return results
def parse_course_date_range(raw: str, today: date | None = None) -> tuple[date | None, date | None]:
reference_today = today or date.today()
normalized = normalize_whitespace(raw).lower()
if not normalized:
return None, None
iso_candidate = None
try:
iso_candidate = datetime.fromisoformat(normalized).date()
except ValueError:
iso_candidate = None
if iso_candidate:
return iso_candidate, iso_candidate
numeric_dates = re.findall(r"\b\d{1,2}[./]\d{1,2}[./]\d{2,4}\b", normalized)
if len(numeric_dates) >= 2:
start = _parse_numeric_date(numeric_dates[0])
end = _parse_numeric_date(numeric_dates[1])
return start, end or start
if len(numeric_dates) == 1:
single = _parse_numeric_date(numeric_dates[0])
return single, single
range_match = re.search(
r"\b(\d{1,2})\s*\.?\s*(?:-||—|til)\s*(\d{1,2})\.?\s*(" + "|".join(sorted(MONTH_MAP.keys(), key=len, reverse=True)) + r")\b(?:\s+(20\d{2}))?",
normalized,
re.IGNORECASE,
)
if range_match:
start_day = int(range_match.group(1))
end_day = int(range_match.group(2))
month = MONTH_MAP.get(range_match.group(3).lower())
explicit_year = int(range_match.group(4)) if range_match.group(4) else None
if month:
year = _infer_year(month, end_day, explicit_year, reference_today)
start = _to_date(year, month, start_day)
end = _to_date(year, month, end_day)
return start, end or start
textual_dates = _parse_textual_dates(normalized, reference_today)
if len(textual_dates) >= 2:
return textual_dates[0], textual_dates[1]
if len(textual_dates) == 1:
return textual_dates[0], textual_dates[0]
return None, None
def normalize_vtg_course_rows(rows: Any) -> list[dict[str, Any]]:
if not isinstance(rows, list):
return []
normalized_rows: list[dict[str, Any]] = []
for index, row in enumerate(rows):
if not isinstance(row, dict):
continue
display_label = normalize_whitespace(str(row.get("dato") or row.get("display_label") or ""))
if not display_label:
continue
status = normalize_whitespace(str(row.get("status") or "Ledig")) or "Ledig"
explicit_start = row.get("start_date")
explicit_end = row.get("end_date")
if explicit_start:
try:
start_date = datetime.fromisoformat(str(explicit_start)).date()
except ValueError:
start_date = None
else:
start_date = None
if explicit_end:
try:
end_date = datetime.fromisoformat(str(explicit_end)).date()
except ValueError:
end_date = None
else:
end_date = None
if not start_date and not end_date:
start_date, end_date = parse_course_date_range(display_label)
normalized_rows.append(
{
"dato": display_label,
"status": status,
"start_date": start_date.isoformat() if start_date else None,
"end_date": end_date.isoformat() if end_date else None,
"sort_order": index,
}
)
normalized_rows.sort(
key=lambda row: (
row.get("start_date") or row.get("end_date") or "9999-12-31",
int(row.get("sort_order") or 0),
row.get("dato") or "",
)
)
return normalized_rows
def is_upcoming_course(row: dict[str, Any], today: date | None = None) -> bool:
reference_today = today or date.today()
end_value = row.get("end_date") or row.get("start_date")
if not end_value:
return True
try:
end_date = datetime.fromisoformat(str(end_value)).date()
except ValueError:
return True
return end_date >= reference_today
def filter_upcoming_courses(rows: Any) -> list[dict[str, Any]]:
normalized_rows = normalize_vtg_course_rows(rows)
return [row for row in normalized_rows if is_upcoming_course(row)]