158 lines
9.6 KiB
Python
158 lines
9.6 KiB
Python
import asyncio, asyncpg, urllib.request, json, re, os, requests
|
|
from env_config import get_database_url
|
|
|
|
# --- KONFIGURASJON ---
|
|
DB_URL = get_database_url()
|
|
WP_API_URL = "https://teeoff.no/wp-json/wp/v2/golfbaner?per_page=100&_embed"
|
|
MEDIA_ENDPOINT = "https://teeoff.no/wp-json/wp/v2/media"
|
|
MEDIA_DIR = "./public/media"
|
|
|
|
os.makedirs(MEDIA_DIR, exist_ok=True)
|
|
media_cache = {}
|
|
|
|
def get_url_from_id(media_id):
|
|
if not media_id or not isinstance(media_id, int): return None
|
|
if media_id in media_cache: return media_cache[media_id]
|
|
try:
|
|
resp = requests.get(f"{MEDIA_ENDPOINT}/{media_id}", timeout=10)
|
|
if resp.status_code == 200:
|
|
url = resp.json().get('source_url')
|
|
media_cache[media_id] = url
|
|
return url
|
|
except: return None
|
|
|
|
def download_media(url, slug, prefix):
|
|
if not isinstance(url, str) or not url: return None
|
|
clean_url = url.replace("https:///", "https://").replace("http:///", "http://")
|
|
if "teeoff.no" not in clean_url: return clean_url
|
|
try:
|
|
ext = clean_url.split('.')[-1].split('?')[0].lower()
|
|
if len(ext) > 4 or len(ext) < 3: ext = "jpg"
|
|
filename = f"{prefix}_{slug}.{ext}"
|
|
filepath = os.path.join(MEDIA_DIR, filename)
|
|
if os.path.exists(filepath): return f"/media/{filename}"
|
|
response = requests.get(clean_url, timeout=15)
|
|
if response.status_code == 200:
|
|
with open(filepath, 'wb') as f: f.write(response.content)
|
|
return f"/media/{filename}"
|
|
except: pass
|
|
return None
|
|
|
|
def decode_html(text):
|
|
if not text: return ""
|
|
return str(text).replace('&', '&').replace('&', '&').replace(' ', ' ').strip()
|
|
|
|
def parse_int(val):
|
|
if val is None or val == '': return None
|
|
try:
|
|
nums = re.findall(r'\d+', str(val))
|
|
return int(nums[0]) if nums else None
|
|
except: return None
|
|
|
|
def extract_url(val):
|
|
if isinstance(val, dict): return val.get('url')
|
|
if isinstance(val, str): return val
|
|
return None
|
|
|
|
async def run_master_import():
|
|
print("🚀 Starter MASTER IMPORT v9.2 (Robust datakonvertering & Banetype)...")
|
|
conn = await asyncpg.connect(DB_URL)
|
|
|
|
# Tømmer kun courses og holes (hjelpetabeller)
|
|
await conn.execute("TRUNCATE courses, holes RESTART IDENTITY CASCADE;")
|
|
|
|
page = 1
|
|
while True:
|
|
try:
|
|
req = urllib.request.Request(f"{WP_API_URL}&page={page}", headers={'User-Agent': 'TeeOff-V9.2'})
|
|
with urllib.request.urlopen(req) as response:
|
|
data = json.loads(response.read().decode())
|
|
except: break
|
|
if not data: break
|
|
|
|
for post in data:
|
|
acf = post.get('acf', {})
|
|
slug = post['slug']
|
|
name = decode_html(post.get('title', {}).get('rendered', ''))
|
|
print(f"📦 Mapper {name}...")
|
|
|
|
# Media & Identifiers
|
|
local_main_img = download_media(post.get('_embedded', {}).get('wp:featuredmedia', [{}])[0].get('source_url'), slug, "main")
|
|
local_logo = download_media(get_url_from_id(acf.get('logo')) if isinstance(acf.get('logo'), int) else extract_url(acf.get('logo')), slug, "logo")
|
|
|
|
# Galleri
|
|
slides = acf.get('slides') or []
|
|
local_gallery = [download_media(get_url_from_id(s) if isinstance(s, int) else extract_url(s), f"{slug}_{i}", "slide") for i, s in enumerate(slides)]
|
|
local_gallery = [url for url in local_gallery if url]
|
|
|
|
# Golfbox
|
|
booking_id = acf.get('golfbox_booking_id')
|
|
gb_booking_url = f"http://www.golfbox.no/site/system/redirect.asp?locale=nb_NO&rUrl=%2Fsite%2Fressources%2Fbooking%2Fgrid.asp%3FRessource_GUID%3D%{{{str(booking_id).strip().replace('{','').replace('}','')}}}" if booking_id else None
|
|
|
|
# --- UPSERT FACILITY ---
|
|
# Merk: $16 (status_updated_at) pakkes nå inn i TO_DATE for å unngå krasj
|
|
await conn.execute('''
|
|
INSERT INTO facilities (
|
|
name, slug, description, address, city, county, established_year, season,
|
|
email, phone, website_url, image_url, logo_url, video_url,
|
|
amenities, status_updated_at, gallery, banetype,
|
|
ngf_number, golfbox_club_id, golfbox_booking_url,
|
|
facebook_url, instagram_url, baneguide_url, flyfoto_url,
|
|
golfbox_tournament_url, footnote, social_links, webcam_url,
|
|
weather_url, architect,
|
|
navn_standard_medlemskap, standard_medlemskap, standard_medlemskap_kommentarer,
|
|
navn_rimeligste_alternativ, rimeligste_alternativ, rimeligste_alternativ_kommentarer,
|
|
medlemskap_url
|
|
) VALUES (
|
|
$1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15::jsonb,
|
|
TO_DATE(NULLIF($16, ''), 'YYYYMMDD'),
|
|
$17::jsonb, $18, $19, $20, $21, $22, $23, $24, $25, $26, $27, $28::jsonb,
|
|
$29, $30, $31, $32, $33, $34, $35, $36, $37, $38
|
|
)
|
|
ON CONFLICT (slug) DO UPDATE SET
|
|
name = EXCLUDED.name,
|
|
description = EXCLUDED.description,
|
|
address = EXCLUDED.address,
|
|
city = EXCLUDED.city,
|
|
phone = EXCLUDED.phone,
|
|
email = EXCLUDED.email,
|
|
website_url = EXCLUDED.website_url,
|
|
image_url = EXCLUDED.image_url,
|
|
logo_url = EXCLUDED.logo_url,
|
|
amenities = EXCLUDED.amenities,
|
|
gallery = EXCLUDED.gallery,
|
|
status_updated_at = EXCLUDED.status_updated_at,
|
|
banetype = EXCLUDED.banetype,
|
|
architect = EXCLUDED.architect
|
|
''', name, slug, decode_html(acf.get('beskrivelse')), acf.get('gateadresse'), acf.get('postnummer_og_poststed'), acf.get('fylke'), parse_int(acf.get('byggear')), acf.get('sesong'), acf.get('e-post'), acf.get('telefon'), extract_url(acf.get('hjemmeside')), local_main_img, local_logo, None, json.dumps({"drivingrange": decode_html(acf.get("drivingrange")), "treningsgreen": decode_html(acf.get("treningsgreen")), "proshop": decode_html(acf.get("proshop")), "kafe": decode_html(acf.get("kafe")), "bilutleie": decode_html(acf.get("bilutleie")), "kolleutleie": decode_html(acf.get("kolleutleie")), "pro": decode_html(acf.get("pro")), "simulator": decode_html(acf.get("golfsimulator")), "antall_hull": decode_html(acf.get("antall_hull"))}),
|
|
acf.get('dato_for_oppdatert_status'), # $16
|
|
json.dumps(local_gallery), decode_html(acf.get('banetype')),
|
|
parse_int(acf.get('klubbnummer_norges_golfforbund')), parse_int(acf.get('klubbnummer_golfbox')), gb_booking_url, extract_url(acf.get('facebook_url')), extract_url(acf.get('instagram_url')), extract_url(acf.get('baneguide')), extract_url(acf.get('flyfoto')), extract_url(acf.get('golfbox')), decode_html(acf.get('fotnote')), json.dumps(acf.get('sosiale_lenker') or []), decode_html(acf.get('webkamera')), extract_url(acf.get('varmelding_yr')), decode_html(acf.get('arkitekt')), decode_html(acf.get('navn_standard_medlemskap')), parse_int(acf.get('standard_medlemskap')), decode_html(acf.get('standard_medlemskap_kommentarer')), decode_html(acf.get('navn_rimeligste_alternativ')), parse_int(acf.get('rimeligste_alternativ')), decode_html(acf.get('rimeligste_alternativ_kommentarer')), extract_url(acf.get('medlemskap_url')))
|
|
|
|
fac_id = (await conn.fetchrow("SELECT id FROM facilities WHERE slug = $1", slug))['id']
|
|
|
|
# Baner og Hull
|
|
fac_main_len = 0
|
|
for suffix in ['', '_bane_to']:
|
|
c_name = acf.get('navn_pa_hovedbane' if suffix == '' else 'navn_pa_sekundar_bane') or ('Hovedbanen' if suffix == '' else 'Bane 2')
|
|
status = acf.get('banestatus' if suffix == '' else 'banestatus_sekundar_bane')
|
|
if suffix == '_bane_to' and (status == 'finnes_ingen_bane_to' or not parse_int(acf.get('hull_1_par_bane_to'))): continue
|
|
course_id = await conn.fetchval('INSERT INTO courses (facility_id, name, status, par, physical_hole_count, is_main_course, tee_boxes, architect) VALUES ($1, $2, $3, $4, $5, $6, $7, $8) RETURNING id', fac_id, c_name, status, parse_int(acf.get('totalt_par' if suffix == '' else 'totalt_par_bane_to')), parse_int(acf.get('antall_hull')) if suffix == '' else None, (suffix == ''), json.dumps({"herrer": acf.get(f"utslag_herrer{suffix}"), "damer": acf.get(f"utslag_damer{suffix}")}), decode_html(acf.get('arkitekt')))
|
|
curr_len = 0
|
|
for h_num in range(1, 19):
|
|
p = parse_int(acf.get(f'hull_{h_num}_par{suffix}'))
|
|
if p:
|
|
idx = parse_int(acf.get(f'hull_{h_num}_index{suffix}'))
|
|
lens = {k: parse_int(acf.get(f'{k}_hull_{h_num}{suffix}')) for k in ['lengst', 'lang', 'mellomlang', 'mellomkort', 'kort', 'kortest']}
|
|
curr_len += (lens['lengst'] or 0)
|
|
await conn.execute('INSERT INTO holes (course_id, hole_number, par, hcp_index, lengths) VALUES ($1, $2, $3, $4, $5::jsonb)', course_id, h_num, p, idx, json.dumps(lens))
|
|
await conn.execute("UPDATE courses SET length_meters = $1 WHERE id = $2", curr_len, course_id)
|
|
if suffix == '': fac_main_len = curr_len
|
|
await conn.execute("UPDATE facilities SET length_meters = $1 WHERE id = $2", fac_main_len, fac_id)
|
|
|
|
page += 1
|
|
await conn.close()
|
|
print("✅ IMPORT FERDIG!")
|
|
|
|
if __name__ == "__main__":
|
|
asyncio.run(run_master_import())
|