import re from datetime import date, datetime from typing import Any MONTH_MAP: dict[str, int] = { "januar": 1, "jan": 1, "februar": 2, "feb": 2, "mars": 3, "mar": 3, "april": 4, "apr": 4, "mai": 5, "juni": 6, "jun": 6, "juli": 7, "jul": 7, "august": 8, "aug": 8, "september": 9, "sep": 9, "sept": 9, "oktober": 10, "okt": 10, "november": 11, "nov": 11, "desember": 12, "des": 12, } def normalize_whitespace(value: str) -> str: return re.sub(r"\s+", " ", str(value or "")).strip() def _to_date(year: int, month: int, day: int) -> date | None: try: return date(year, month, day) except ValueError: return None def _infer_year(month: int, day: int, explicit_year: int | None, today: date) -> int: if explicit_year: return explicit_year candidate = _to_date(today.year, month, day) if candidate and candidate < today.replace(day=max(1, min(today.day, 28))): if (today - candidate).days > 7: return today.year + 1 return today.year def _parse_numeric_date(raw: str) -> date | None: match = re.search(r"\b(\d{1,2})[./](\d{1,2})[./](\d{2,4})\b", raw) if not match: return None day = int(match.group(1)) month = int(match.group(2)) year = int(match.group(3)) if year < 100: year += 2000 return _to_date(year, month, day) def _parse_textual_dates(raw: str, today: date) -> list[date]: results: list[date] = [] pattern = re.compile( r"\b(\d{1,2})\.?\s*(" + "|".join(sorted(MONTH_MAP.keys(), key=len, reverse=True)) + r")\b(?:\s+(20\d{2}))?", re.IGNORECASE, ) for match in pattern.finditer(raw): day = int(match.group(1)) month = MONTH_MAP.get(match.group(2).lower()) if not month: continue explicit_year = int(match.group(3)) if match.group(3) else None year = _infer_year(month, day, explicit_year, today) candidate = _to_date(year, month, day) if candidate: results.append(candidate) return results def parse_course_date_range(raw: str, today: date | None = None) -> tuple[date | None, date | None]: reference_today = today or date.today() normalized = normalize_whitespace(raw).lower() if not normalized: return None, None iso_candidate = None try: iso_candidate = datetime.fromisoformat(normalized).date() except ValueError: iso_candidate = None if iso_candidate: return iso_candidate, iso_candidate numeric_dates = re.findall(r"\b\d{1,2}[./]\d{1,2}[./]\d{2,4}\b", normalized) if len(numeric_dates) >= 2: start = _parse_numeric_date(numeric_dates[0]) end = _parse_numeric_date(numeric_dates[1]) return start, end or start if len(numeric_dates) == 1: single = _parse_numeric_date(numeric_dates[0]) return single, single range_match = re.search( r"\b(\d{1,2})\s*\.?\s*(?:-|–|—|til)\s*(\d{1,2})\.?\s*(" + "|".join(sorted(MONTH_MAP.keys(), key=len, reverse=True)) + r")\b(?:\s+(20\d{2}))?", normalized, re.IGNORECASE, ) if range_match: start_day = int(range_match.group(1)) end_day = int(range_match.group(2)) month = MONTH_MAP.get(range_match.group(3).lower()) explicit_year = int(range_match.group(4)) if range_match.group(4) else None if month: year = _infer_year(month, end_day, explicit_year, reference_today) start = _to_date(year, month, start_day) end = _to_date(year, month, end_day) return start, end or start textual_dates = _parse_textual_dates(normalized, reference_today) if len(textual_dates) >= 2: return textual_dates[0], textual_dates[1] if len(textual_dates) == 1: return textual_dates[0], textual_dates[0] return None, None def normalize_vtg_course_rows(rows: Any) -> list[dict[str, Any]]: if not isinstance(rows, list): return [] normalized_rows: list[dict[str, Any]] = [] for index, row in enumerate(rows): if not isinstance(row, dict): continue display_label = normalize_whitespace(str(row.get("dato") or row.get("display_label") or "")) if not display_label: continue status = normalize_whitespace(str(row.get("status") or "Ledig")) or "Ledig" explicit_start = row.get("start_date") explicit_end = row.get("end_date") if explicit_start: try: start_date = datetime.fromisoformat(str(explicit_start)).date() except ValueError: start_date = None else: start_date = None if explicit_end: try: end_date = datetime.fromisoformat(str(explicit_end)).date() except ValueError: end_date = None else: end_date = None if not start_date and not end_date: start_date, end_date = parse_course_date_range(display_label) normalized_rows.append( { "dato": display_label, "status": status, "start_date": start_date.isoformat() if start_date else None, "end_date": end_date.isoformat() if end_date else None, "sort_order": index, } ) normalized_rows.sort( key=lambda row: ( row.get("start_date") or row.get("end_date") or "9999-12-31", int(row.get("sort_order") or 0), row.get("dato") or "", ) ) return normalized_rows def is_upcoming_course(row: dict[str, Any], today: date | None = None) -> bool: reference_today = today or date.today() end_value = row.get("end_date") or row.get("start_date") if not end_value: return True try: end_date = datetime.fromisoformat(str(end_value)).date() except ValueError: return True return end_date >= reference_today def filter_upcoming_courses(rows: Any) -> list[dict[str, Any]]: normalized_rows = normalize_vtg_course_rows(rows) return [row for row in normalized_rows if is_upcoming_course(row)] def get_invalid_vtg_course_labels(rows: Any) -> list[str]: if not isinstance(rows, list): return [] invalid_labels: list[str] = [] for row in rows: if not isinstance(row, dict): continue display_label = normalize_whitespace(str(row.get("dato") or row.get("display_label") or "")) if not display_label: continue explicit_start = row.get("start_date") explicit_end = row.get("end_date") start_date = None end_date = None if explicit_start: try: start_date = datetime.fromisoformat(str(explicit_start)).date() except ValueError: start_date = None if explicit_end: try: end_date = datetime.fromisoformat(str(explicit_end)).date() except ValueError: end_date = None if not start_date and not end_date: start_date, end_date = parse_course_date_range(display_label) if not start_date and not end_date: invalid_labels.append(display_label) return invalid_labels