Nye-TeeOff/backend/import_wp.py

198 lines
10 KiB
Python
Raw Normal View History

import asyncio, asyncpg, urllib.request, json, re, os, requests
2026-02-26 09:20:51 +01:00
# --- KONFIGURASJON ---
2026-02-26 09:20:51 +01:00
DB_URL = "postgresql://teeoff_admin:teeoff_secret_password@db:5432/teeoff"
WP_API_URL = "https://teeoff.no/wp-json/wp/v2/golfbaner?per_page=100&_embed"
MEDIA_ENDPOINT = "https://teeoff.no/wp-json/wp/v2/media"
MEDIA_DIR = "./public/media"
os.makedirs(MEDIA_DIR, exist_ok=True)
# Cache for å slippe å spørre om samme bilde-ID flere ganger
media_cache = {}
def get_url_from_id(media_id):
"""Slår opp en WordPress Media-ID og returnerer den faktiske bilde-URLen"""
if not media_id or not isinstance(media_id, int):
return None
if media_id in media_cache:
return media_cache[media_id]
try:
print(f" 🔍 Slår opp Media-ID: {media_id}...")
resp = requests.get(f"{MEDIA_ENDPOINT}/{media_id}", timeout=10)
if resp.status_code == 200:
url = resp.json().get('source_url')
media_cache[media_id] = url
return url
except Exception as e:
print(f" ⚠️ Kunne ikke finne URL for Media-ID {media_id}: {e}")
return None
def download_media(url, slug, prefix):
if not isinstance(url, str) or not url:
return None
# Reparer "triple-slash" og andre WP-feil
clean_url = url.replace("https:///", "https://").replace("http:///", "http://")
if "teeoff.no" not in clean_url:
return clean_url
try:
ext = clean_url.split('.')[-1].split('?')[0].lower()
if len(ext) > 4 or len(ext) < 3: ext = "jpg"
filename = f"{prefix}_{slug}.{ext}"
filepath = os.path.join(MEDIA_DIR, filename)
if os.path.exists(filepath):
return f"/media/{filename}"
response = requests.get(clean_url, timeout=15)
if response.status_code == 200:
with open(filepath, 'wb') as f:
f.write(response.content)
return f"/media/{filename}"
except Exception as e:
print(f" ⚠️ Feil ved nedlasting: {e}")
return None
2026-02-26 09:20:51 +01:00
def decode_html(text):
if not text: return ""
return str(text).replace('&#038;', '&').replace('&amp;', '&').replace('&nbsp;', ' ').strip()
def parse_int(val):
if val is None or val == '': return None
try:
nums = re.findall(r'\d+', str(val))
return int(nums[0]) if nums else None
except: return None
def extract_url(val):
if isinstance(val, dict): return val.get('url')
if isinstance(val, str): return val
return None
2026-02-26 09:20:51 +01:00
async def run_master_import():
print("🚀 Starter MASTER IMPORT v8.9.2 (Media ID Resolver)...")
2026-02-26 09:20:51 +01:00
conn = await asyncpg.connect(DB_URL)
await conn.execute("TRUNCATE facilities, courses, holes RESTART IDENTITY CASCADE;")
page = 1
while True:
try:
req = urllib.request.Request(f"{WP_API_URL}&page={page}", headers={'User-Agent': 'TeeOff-V8.9.2'})
2026-02-26 09:20:51 +01:00
with urllib.request.urlopen(req) as response:
data = json.loads(response.read().decode())
except Exception: break
2026-02-26 09:20:51 +01:00
if not data: break
for post in data:
acf = post.get('acf', {})
slug = post['slug']
2026-02-26 09:20:51 +01:00
name = decode_html(post.get('title', {}).get('rendered', ''))
print(f"📦 Mapper {name}...")
# --- 1. HOVEDBILDE ---
featured_img = post.get('_embedded', {}).get('wp:featuredmedia', [{}])[0].get('source_url')
local_main_img = download_media(featured_img, slug, "main")
# --- 2. LOGO ---
logo_field = acf.get('logo')
logo_url = extract_url(logo_field)
if not logo_url and isinstance(logo_field, int):
logo_url = get_url_from_id(logo_field)
local_logo = download_media(logo_url, slug, "logo")
# --- 3. GALLERI (SLIDER) ---
slides = acf.get('slides') or []
local_gallery = []
if isinstance(slides, list):
for idx, s in enumerate(slides):
url = None
if isinstance(s, int): # DIN CASE: Vi har en ID
url = get_url_from_id(s)
elif isinstance(s, dict):
url = s.get('url')
elif isinstance(s, str):
url = s
if url:
res = download_media(url, f"{slug}_{idx}", "slide")
if res: local_gallery.append(res)
# --- GOLFBOX & SOSIALE ---
booking_id = acf.get('golfbox_booking_id')
gb_booking_url = f"http://www.golfbox.no/site/system/redirect.asp?locale=nb_NO&rUrl=%2Fsite%2Fressources%2Fbooking%2Fgrid.asp%3FRessource_GUID%3D%{{{str(booking_id).strip().replace('{','').replace('}','')}}}" if booking_id else None
# --- INSERT FACILITY ---
await conn.execute('''
2026-02-26 09:20:51 +01:00
INSERT INTO facilities (
name, slug, description, established_year, season, address, city, county,
lat, lng, email, phone, website_url, image_url, amenities, greenfee,
status_updated_at, logo_url, video_url, guest_requirements,
faqs, shotzoom, gallery, ngf_number, golfbox_club_id, golfbox_booking_url,
facebook_url, instagram_url, baneguide_url, flyfoto_url, golfbox_tournament_url,
footnote, social_links, webcam_url, weather_url, architect,
navn_standard_medlemskap, standard_medlemskap, standard_medlemskap_kommentarer,
navn_rimeligste_alternativ, rimeligste_alternativ, rimeligste_alternativ_kommentarer,
medlemskap_url
2026-02-26 09:20:51 +01:00
) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15::jsonb,
$16::jsonb, TO_DATE(NULLIF($17, ''), 'YYYYMMDD'),
$18, $19, $20, $21::jsonb, $22::jsonb, $23::jsonb,
$24, $25, $26, $27, $28, $29, $30, $31, $32, $33::jsonb, $34, $35, $36,
$37, $38, $39, $40, $41, $42, $43)
''',
name, slug, decode_html(acf.get('beskrivelse')), parse_int(acf.get('byggear')), acf.get('sesong'),
2026-02-26 09:20:51 +01:00
acf.get('gateadresse'), acf.get('postnummer_og_poststed'), acf.get('fylke'),
float(acf.get('banekart', {}).get('lat', 0)) if acf.get('banekart') else None,
float(acf.get('banekart', {}).get('lng', 0)) if acf.get('banekart') else None,
acf.get('e-post'), acf.get('telefon'), extract_url(acf.get('hjemmeside')),
local_main_img,
json.dumps({"drivingrange": decode_html(acf.get("drivingrange")), "treningsgreen": decode_html(acf.get("treningsgreen")), "proshop": decode_html(acf.get("proshop")), "kafe": decode_html(acf.get("kafe")), "bilutleie": decode_html(acf.get("bilutleie")), "kolleutleie": decode_html(acf.get("kolleutleie")), "pro": decode_html(acf.get("pro")), "simulator": decode_html(acf.get("golfsimulator")), "antall_hull": decode_html(acf.get("antall_hull"))}),
json.dumps(acf.get('greenfee_-_voksne') or []),
acf.get('dato_for_oppdatert_status'), local_logo,
None, decode_html(acf.get('krav_til_gjestespillere')),
json.dumps([]), json.dumps(acf.get('shotzoom') or []), json.dumps(local_gallery),
parse_int(acf.get('klubbnummer_norges_golfforbund')), parse_int(acf.get('klubbnummer_golfbox')),
gb_booking_url, extract_url(acf.get('facebook_url')), extract_url(acf.get('instagram_url')),
extract_url(acf.get('baneguide')), extract_url(acf.get('flyfoto')), extract_url(acf.get('golfbox')),
decode_html(acf.get('fotnote')), json.dumps(acf.get('sosiale_lenker') or []),
decode_html(acf.get('webkamera')), extract_url(acf.get('varmelding_yr')), decode_html(acf.get('arkitekt')),
decode_html(acf.get('navn_standard_medlemskap')), parse_int(acf.get('standard_medlemskap')),
decode_html(acf.get('standard_medlemskap_kommentarer')), decode_html(acf.get('navn_rimeligste_alternativ')),
parse_int(acf.get('rimeligste_alternativ')), decode_html(acf.get('rimeligste_alternativ_kommentarer')),
extract_url(acf.get('medlemskap_url')))
# Hent facility id for baner
fac_row = await conn.fetchrow("SELECT id FROM facilities WHERE slug = $1", slug)
fac_id = fac_row['id']
# --- BANER OG HULL (Samme som før) ---
fac_main_len = 0
2026-02-26 09:20:51 +01:00
for suffix in ['', '_bane_to']:
c_name = acf.get('navn_pa_hovedbane' if suffix == '' else 'navn_pa_sekundar_bane') or ('Hovedbane' if suffix == '' else 'Bane 2')
2026-02-26 09:20:51 +01:00
status = acf.get('banestatus' if suffix == '' else 'banestatus_sekundar_bane')
if suffix == '_bane_to' and (status == 'finnes_ingen_bane_to' or not parse_int(acf.get('hull_1_par_bane_to'))): continue
course_id = await conn.fetchval('INSERT INTO courses (facility_id, name, status, par, is_main_course, tee_boxes, architect) VALUES ($1, $2, $3, $4, $5, $6, $7) RETURNING id', fac_id, c_name, status, parse_int(acf.get('totalt_par' if suffix == '' else 'totalt_par_bane_to')), (suffix == ''), json.dumps({"herrer": acf.get(f"utslag_herrer{suffix}"), "damer": acf.get(f"utslag_damer{suffix}")}), decode_html(acf.get('arkitekt')))
curr_len = 0
2026-02-26 09:20:51 +01:00
for h_num in range(1, 19):
p = parse_int(acf.get(f'hull_{h_num}_par{suffix}'))
if p:
idx = parse_int(acf.get(f'hull_{h_num}_index{suffix}'))
lens = {k: parse_int(acf.get(f'{k}_hull_{h_num}{suffix}')) for k in ['lengst', 'lang', 'mellomlang', 'mellomkort', 'kort', 'kortest']}
curr_len += (lens['lengst'] or 0)
await conn.execute('INSERT INTO holes (course_id, hole_number, par, hcp_index, lengths) VALUES ($1, $2, $3, $4, $5::jsonb)', course_id, h_num, p, idx, json.dumps(lens))
await conn.execute("UPDATE courses SET length_meters = $1 WHERE id = $2", curr_len, course_id)
if suffix == '': fac_main_len = curr_len
await conn.execute("UPDATE facilities SET length_meters = $1 WHERE id = $2", fac_main_len, fac_id)
2026-02-26 09:20:51 +01:00
page += 1
await conn.close()
print("✅ GALLERI-BILDER RESOLVED OG IMPORT FERDIG!")
2026-02-26 09:20:51 +01:00
if __name__ == "__main__":
asyncio.run(run_master_import())