Nye-TeeOff/backend/vtg_courses.py

241 lines
7.2 KiB
Python
Raw Normal View History

import re
from datetime import date, datetime
from typing import Any
MONTH_MAP: dict[str, int] = {
"januar": 1,
"jan": 1,
"februar": 2,
"feb": 2,
"mars": 3,
"mar": 3,
"april": 4,
"apr": 4,
"mai": 5,
"juni": 6,
"jun": 6,
"juli": 7,
"jul": 7,
"august": 8,
"aug": 8,
"september": 9,
"sep": 9,
"sept": 9,
"oktober": 10,
"okt": 10,
"november": 11,
"nov": 11,
"desember": 12,
"des": 12,
}
def normalize_whitespace(value: str) -> str:
return re.sub(r"\s+", " ", str(value or "")).strip()
def _to_date(year: int, month: int, day: int) -> date | None:
try:
return date(year, month, day)
except ValueError:
return None
def _infer_year(month: int, day: int, explicit_year: int | None, today: date) -> int:
if explicit_year:
return explicit_year
candidate = _to_date(today.year, month, day)
if candidate and candidate < today.replace(day=max(1, min(today.day, 28))):
if (today - candidate).days > 7:
return today.year + 1
return today.year
def _parse_numeric_date(raw: str) -> date | None:
match = re.search(r"\b(\d{1,2})[./](\d{1,2})[./](\d{2,4})\b", raw)
if not match:
return None
day = int(match.group(1))
month = int(match.group(2))
year = int(match.group(3))
if year < 100:
year += 2000
return _to_date(year, month, day)
def _parse_textual_dates(raw: str, today: date) -> list[date]:
results: list[date] = []
pattern = re.compile(
r"\b(\d{1,2})\.?\s*(" + "|".join(sorted(MONTH_MAP.keys(), key=len, reverse=True)) + r")\b(?:\s+(20\d{2}))?",
re.IGNORECASE,
)
for match in pattern.finditer(raw):
day = int(match.group(1))
month = MONTH_MAP.get(match.group(2).lower())
if not month:
continue
explicit_year = int(match.group(3)) if match.group(3) else None
year = _infer_year(month, day, explicit_year, today)
candidate = _to_date(year, month, day)
if candidate:
results.append(candidate)
return results
def parse_course_date_range(raw: str, today: date | None = None) -> tuple[date | None, date | None]:
reference_today = today or date.today()
normalized = normalize_whitespace(raw).lower()
if not normalized:
return None, None
iso_candidate = None
try:
iso_candidate = datetime.fromisoformat(normalized).date()
except ValueError:
iso_candidate = None
if iso_candidate:
return iso_candidate, iso_candidate
numeric_dates = re.findall(r"\b\d{1,2}[./]\d{1,2}[./]\d{2,4}\b", normalized)
if len(numeric_dates) >= 2:
start = _parse_numeric_date(numeric_dates[0])
end = _parse_numeric_date(numeric_dates[1])
return start, end or start
if len(numeric_dates) == 1:
single = _parse_numeric_date(numeric_dates[0])
return single, single
range_match = re.search(
r"\b(\d{1,2})\s*\.?\s*(?:-||—|til)\s*(\d{1,2})\.?\s*(" + "|".join(sorted(MONTH_MAP.keys(), key=len, reverse=True)) + r")\b(?:\s+(20\d{2}))?",
normalized,
re.IGNORECASE,
)
if range_match:
start_day = int(range_match.group(1))
end_day = int(range_match.group(2))
month = MONTH_MAP.get(range_match.group(3).lower())
explicit_year = int(range_match.group(4)) if range_match.group(4) else None
if month:
year = _infer_year(month, end_day, explicit_year, reference_today)
start = _to_date(year, month, start_day)
end = _to_date(year, month, end_day)
return start, end or start
textual_dates = _parse_textual_dates(normalized, reference_today)
if len(textual_dates) >= 2:
return textual_dates[0], textual_dates[1]
if len(textual_dates) == 1:
return textual_dates[0], textual_dates[0]
return None, None
def normalize_vtg_course_rows(rows: Any) -> list[dict[str, Any]]:
if not isinstance(rows, list):
return []
normalized_rows: list[dict[str, Any]] = []
for index, row in enumerate(rows):
if not isinstance(row, dict):
continue
display_label = normalize_whitespace(str(row.get("dato") or row.get("display_label") or ""))
if not display_label:
continue
status = normalize_whitespace(str(row.get("status") or "Ledig")) or "Ledig"
explicit_start = row.get("start_date")
explicit_end = row.get("end_date")
if explicit_start:
try:
start_date = datetime.fromisoformat(str(explicit_start)).date()
except ValueError:
start_date = None
else:
start_date = None
if explicit_end:
try:
end_date = datetime.fromisoformat(str(explicit_end)).date()
except ValueError:
end_date = None
else:
end_date = None
if not start_date and not end_date:
start_date, end_date = parse_course_date_range(display_label)
normalized_rows.append(
{
"dato": display_label,
"status": status,
"start_date": start_date.isoformat() if start_date else None,
"end_date": end_date.isoformat() if end_date else None,
"sort_order": index,
}
)
normalized_rows.sort(
key=lambda row: (
row.get("start_date") or row.get("end_date") or "9999-12-31",
int(row.get("sort_order") or 0),
row.get("dato") or "",
)
)
return normalized_rows
def is_upcoming_course(row: dict[str, Any], today: date | None = None) -> bool:
reference_today = today or date.today()
end_value = row.get("end_date") or row.get("start_date")
if not end_value:
return True
try:
end_date = datetime.fromisoformat(str(end_value)).date()
except ValueError:
return True
return end_date >= reference_today
def filter_upcoming_courses(rows: Any) -> list[dict[str, Any]]:
normalized_rows = normalize_vtg_course_rows(rows)
return [row for row in normalized_rows if is_upcoming_course(row)]
2026-04-28 13:53:00 +02:00
def get_invalid_vtg_course_labels(rows: Any) -> list[str]:
if not isinstance(rows, list):
return []
invalid_labels: list[str] = []
for row in rows:
if not isinstance(row, dict):
continue
display_label = normalize_whitespace(str(row.get("dato") or row.get("display_label") or ""))
if not display_label:
continue
explicit_start = row.get("start_date")
explicit_end = row.get("end_date")
start_date = None
end_date = None
if explicit_start:
try:
start_date = datetime.fromisoformat(str(explicit_start)).date()
except ValueError:
start_date = None
if explicit_end:
try:
end_date = datetime.fromisoformat(str(explicit_end)).date()
except ValueError:
end_date = None
if not start_date and not end_date:
start_date, end_date = parse_course_date_range(display_label)
if not start_date and not end_date:
invalid_labels.append(display_label)
return invalid_labels