2026-04-25 07:45:34 +02:00
|
|
|
|
import re
|
|
|
|
|
|
from datetime import date, datetime
|
|
|
|
|
|
from typing import Any
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
MONTH_MAP: dict[str, int] = {
|
|
|
|
|
|
"januar": 1,
|
|
|
|
|
|
"jan": 1,
|
|
|
|
|
|
"februar": 2,
|
|
|
|
|
|
"feb": 2,
|
|
|
|
|
|
"mars": 3,
|
|
|
|
|
|
"mar": 3,
|
|
|
|
|
|
"april": 4,
|
|
|
|
|
|
"apr": 4,
|
|
|
|
|
|
"mai": 5,
|
|
|
|
|
|
"juni": 6,
|
|
|
|
|
|
"jun": 6,
|
|
|
|
|
|
"juli": 7,
|
|
|
|
|
|
"jul": 7,
|
|
|
|
|
|
"august": 8,
|
|
|
|
|
|
"aug": 8,
|
|
|
|
|
|
"september": 9,
|
|
|
|
|
|
"sep": 9,
|
|
|
|
|
|
"sept": 9,
|
|
|
|
|
|
"oktober": 10,
|
|
|
|
|
|
"okt": 10,
|
|
|
|
|
|
"november": 11,
|
|
|
|
|
|
"nov": 11,
|
|
|
|
|
|
"desember": 12,
|
|
|
|
|
|
"des": 12,
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def normalize_whitespace(value: str) -> str:
|
|
|
|
|
|
return re.sub(r"\s+", " ", str(value or "")).strip()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _to_date(year: int, month: int, day: int) -> date | None:
|
|
|
|
|
|
try:
|
|
|
|
|
|
return date(year, month, day)
|
|
|
|
|
|
except ValueError:
|
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _infer_year(month: int, day: int, explicit_year: int | None, today: date) -> int:
|
|
|
|
|
|
if explicit_year:
|
|
|
|
|
|
return explicit_year
|
|
|
|
|
|
|
|
|
|
|
|
candidate = _to_date(today.year, month, day)
|
|
|
|
|
|
if candidate and candidate < today.replace(day=max(1, min(today.day, 28))):
|
|
|
|
|
|
if (today - candidate).days > 7:
|
|
|
|
|
|
return today.year + 1
|
|
|
|
|
|
return today.year
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _parse_numeric_date(raw: str) -> date | None:
|
|
|
|
|
|
match = re.search(r"\b(\d{1,2})[./](\d{1,2})[./](\d{2,4})\b", raw)
|
|
|
|
|
|
if not match:
|
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
|
|
day = int(match.group(1))
|
|
|
|
|
|
month = int(match.group(2))
|
|
|
|
|
|
year = int(match.group(3))
|
|
|
|
|
|
if year < 100:
|
|
|
|
|
|
year += 2000
|
|
|
|
|
|
return _to_date(year, month, day)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _parse_textual_dates(raw: str, today: date) -> list[date]:
|
|
|
|
|
|
results: list[date] = []
|
|
|
|
|
|
pattern = re.compile(
|
|
|
|
|
|
r"\b(\d{1,2})\.?\s*(" + "|".join(sorted(MONTH_MAP.keys(), key=len, reverse=True)) + r")\b(?:\s+(20\d{2}))?",
|
|
|
|
|
|
re.IGNORECASE,
|
|
|
|
|
|
)
|
|
|
|
|
|
for match in pattern.finditer(raw):
|
|
|
|
|
|
day = int(match.group(1))
|
|
|
|
|
|
month = MONTH_MAP.get(match.group(2).lower())
|
|
|
|
|
|
if not month:
|
|
|
|
|
|
continue
|
|
|
|
|
|
explicit_year = int(match.group(3)) if match.group(3) else None
|
|
|
|
|
|
year = _infer_year(month, day, explicit_year, today)
|
|
|
|
|
|
candidate = _to_date(year, month, day)
|
|
|
|
|
|
if candidate:
|
|
|
|
|
|
results.append(candidate)
|
|
|
|
|
|
return results
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def parse_course_date_range(raw: str, today: date | None = None) -> tuple[date | None, date | None]:
|
|
|
|
|
|
reference_today = today or date.today()
|
|
|
|
|
|
normalized = normalize_whitespace(raw).lower()
|
|
|
|
|
|
if not normalized:
|
|
|
|
|
|
return None, None
|
|
|
|
|
|
|
|
|
|
|
|
iso_candidate = None
|
|
|
|
|
|
try:
|
|
|
|
|
|
iso_candidate = datetime.fromisoformat(normalized).date()
|
|
|
|
|
|
except ValueError:
|
|
|
|
|
|
iso_candidate = None
|
|
|
|
|
|
if iso_candidate:
|
|
|
|
|
|
return iso_candidate, iso_candidate
|
|
|
|
|
|
|
|
|
|
|
|
numeric_dates = re.findall(r"\b\d{1,2}[./]\d{1,2}[./]\d{2,4}\b", normalized)
|
|
|
|
|
|
if len(numeric_dates) >= 2:
|
|
|
|
|
|
start = _parse_numeric_date(numeric_dates[0])
|
|
|
|
|
|
end = _parse_numeric_date(numeric_dates[1])
|
|
|
|
|
|
return start, end or start
|
|
|
|
|
|
if len(numeric_dates) == 1:
|
|
|
|
|
|
single = _parse_numeric_date(numeric_dates[0])
|
|
|
|
|
|
return single, single
|
|
|
|
|
|
|
|
|
|
|
|
range_match = re.search(
|
|
|
|
|
|
r"\b(\d{1,2})\s*\.?\s*(?:-|–|—|til)\s*(\d{1,2})\.?\s*(" + "|".join(sorted(MONTH_MAP.keys(), key=len, reverse=True)) + r")\b(?:\s+(20\d{2}))?",
|
|
|
|
|
|
normalized,
|
|
|
|
|
|
re.IGNORECASE,
|
|
|
|
|
|
)
|
|
|
|
|
|
if range_match:
|
|
|
|
|
|
start_day = int(range_match.group(1))
|
|
|
|
|
|
end_day = int(range_match.group(2))
|
|
|
|
|
|
month = MONTH_MAP.get(range_match.group(3).lower())
|
|
|
|
|
|
explicit_year = int(range_match.group(4)) if range_match.group(4) else None
|
|
|
|
|
|
if month:
|
|
|
|
|
|
year = _infer_year(month, end_day, explicit_year, reference_today)
|
|
|
|
|
|
start = _to_date(year, month, start_day)
|
|
|
|
|
|
end = _to_date(year, month, end_day)
|
|
|
|
|
|
return start, end or start
|
|
|
|
|
|
|
|
|
|
|
|
textual_dates = _parse_textual_dates(normalized, reference_today)
|
|
|
|
|
|
if len(textual_dates) >= 2:
|
|
|
|
|
|
return textual_dates[0], textual_dates[1]
|
|
|
|
|
|
if len(textual_dates) == 1:
|
|
|
|
|
|
return textual_dates[0], textual_dates[0]
|
|
|
|
|
|
|
|
|
|
|
|
return None, None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def normalize_vtg_course_rows(rows: Any) -> list[dict[str, Any]]:
|
|
|
|
|
|
if not isinstance(rows, list):
|
|
|
|
|
|
return []
|
|
|
|
|
|
|
|
|
|
|
|
normalized_rows: list[dict[str, Any]] = []
|
|
|
|
|
|
for index, row in enumerate(rows):
|
|
|
|
|
|
if not isinstance(row, dict):
|
|
|
|
|
|
continue
|
|
|
|
|
|
display_label = normalize_whitespace(str(row.get("dato") or row.get("display_label") or ""))
|
|
|
|
|
|
if not display_label:
|
|
|
|
|
|
continue
|
|
|
|
|
|
status = normalize_whitespace(str(row.get("status") or "Ledig")) or "Ledig"
|
|
|
|
|
|
explicit_start = row.get("start_date")
|
|
|
|
|
|
explicit_end = row.get("end_date")
|
|
|
|
|
|
if explicit_start:
|
|
|
|
|
|
try:
|
|
|
|
|
|
start_date = datetime.fromisoformat(str(explicit_start)).date()
|
|
|
|
|
|
except ValueError:
|
|
|
|
|
|
start_date = None
|
|
|
|
|
|
else:
|
|
|
|
|
|
start_date = None
|
|
|
|
|
|
if explicit_end:
|
|
|
|
|
|
try:
|
|
|
|
|
|
end_date = datetime.fromisoformat(str(explicit_end)).date()
|
|
|
|
|
|
except ValueError:
|
|
|
|
|
|
end_date = None
|
|
|
|
|
|
else:
|
|
|
|
|
|
end_date = None
|
|
|
|
|
|
|
|
|
|
|
|
if not start_date and not end_date:
|
|
|
|
|
|
start_date, end_date = parse_course_date_range(display_label)
|
|
|
|
|
|
|
|
|
|
|
|
normalized_rows.append(
|
|
|
|
|
|
{
|
|
|
|
|
|
"dato": display_label,
|
|
|
|
|
|
"status": status,
|
|
|
|
|
|
"start_date": start_date.isoformat() if start_date else None,
|
|
|
|
|
|
"end_date": end_date.isoformat() if end_date else None,
|
|
|
|
|
|
"sort_order": index,
|
|
|
|
|
|
}
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
normalized_rows.sort(
|
|
|
|
|
|
key=lambda row: (
|
|
|
|
|
|
row.get("start_date") or row.get("end_date") or "9999-12-31",
|
|
|
|
|
|
int(row.get("sort_order") or 0),
|
|
|
|
|
|
row.get("dato") or "",
|
|
|
|
|
|
)
|
|
|
|
|
|
)
|
|
|
|
|
|
return normalized_rows
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def is_upcoming_course(row: dict[str, Any], today: date | None = None) -> bool:
|
|
|
|
|
|
reference_today = today or date.today()
|
|
|
|
|
|
end_value = row.get("end_date") or row.get("start_date")
|
|
|
|
|
|
if not end_value:
|
|
|
|
|
|
return True
|
|
|
|
|
|
try:
|
|
|
|
|
|
end_date = datetime.fromisoformat(str(end_value)).date()
|
|
|
|
|
|
except ValueError:
|
|
|
|
|
|
return True
|
|
|
|
|
|
return end_date >= reference_today
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def filter_upcoming_courses(rows: Any) -> list[dict[str, Any]]:
|
|
|
|
|
|
normalized_rows = normalize_vtg_course_rows(rows)
|
|
|
|
|
|
return [row for row in normalized_rows if is_upcoming_course(row)]
|
2026-04-28 13:53:00 +02:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def get_invalid_vtg_course_labels(rows: Any) -> list[str]:
|
|
|
|
|
|
if not isinstance(rows, list):
|
|
|
|
|
|
return []
|
|
|
|
|
|
|
|
|
|
|
|
invalid_labels: list[str] = []
|
|
|
|
|
|
for row in rows:
|
|
|
|
|
|
if not isinstance(row, dict):
|
|
|
|
|
|
continue
|
|
|
|
|
|
|
|
|
|
|
|
display_label = normalize_whitespace(str(row.get("dato") or row.get("display_label") or ""))
|
|
|
|
|
|
if not display_label:
|
|
|
|
|
|
continue
|
|
|
|
|
|
|
|
|
|
|
|
explicit_start = row.get("start_date")
|
|
|
|
|
|
explicit_end = row.get("end_date")
|
|
|
|
|
|
start_date = None
|
|
|
|
|
|
end_date = None
|
|
|
|
|
|
|
|
|
|
|
|
if explicit_start:
|
|
|
|
|
|
try:
|
|
|
|
|
|
start_date = datetime.fromisoformat(str(explicit_start)).date()
|
|
|
|
|
|
except ValueError:
|
|
|
|
|
|
start_date = None
|
|
|
|
|
|
if explicit_end:
|
|
|
|
|
|
try:
|
|
|
|
|
|
end_date = datetime.fromisoformat(str(explicit_end)).date()
|
|
|
|
|
|
except ValueError:
|
|
|
|
|
|
end_date = None
|
|
|
|
|
|
|
|
|
|
|
|
if not start_date and not end_date:
|
|
|
|
|
|
start_date, end_date = parse_course_date_range(display_label)
|
|
|
|
|
|
|
|
|
|
|
|
if not start_date and not end_date:
|
|
|
|
|
|
invalid_labels.append(display_label)
|
|
|
|
|
|
|
|
|
|
|
|
return invalid_labels
|