"""
Visit each smile.one merchant URL with Playwright, read form fields via order_automation._FORM_FIELDS_JS
(skips header/nav/footer so promo E-mail is not captured), write manual_account_fields_scraped.json.

  python tools/snapshot_account_fields.py              # resume partial file
  python tools/snapshot_account_fields.py --force        # second pass: re-scrape all 60 slugs

After internet drop: run again without --force to continue.
"""

from __future__ import annotations

import argparse
import json
import sys
import time
from pathlib import Path

ROOT = Path(__file__).resolve().parent.parent
if str(ROOT) not in sys.path:
    sys.path.insert(0, str(ROOT))

DATA = ROOT / "smile_one_data.json"
OUT = ROOT / "manual_account_fields_scraped.json"

_SKIP_KEY_FRAGMENTS = (
    "search",
    "coupon",
    "newsletter",
    "utm_",
    "g-recaptcha",
    "honeypot",
    "csrf",
    "_token",
)


def _slug(url: str) -> str | None:
    from urllib.parse import urlparse

    p = urlparse((url or "").strip())
    parts = [x for x in p.path.strip("/").split("/") if x]
    for i, seg in enumerate(parts):
        if seg == "merchant" and i + 1 < len(parts):
            if parts[i + 1] == "game" and i + 2 < len(parts):
                return parts[i + 2].lower()
            return parts[i + 1].lower()
        if seg == "pay" and i + 1 < len(parts):
            return parts[i + 1].lower()
    return None


def _keep(f: dict) -> bool:
    k = (f.get("key") or "").strip().lower()
    if not k:
        return False
    for frag in _SKIP_KEY_FRAGMENTS:
        if frag in k:
            return False
    return True


def _trim_fields(fields: list[dict], max_fields: int = 8) -> list[dict]:
    cleaned = []
    for f in fields:
        if not _keep(f):
            continue
        cleaned.append(
            {
                "key": f["key"],
                "label": (f.get("label") or f["key"])[:200],
                "type": f.get("type") or "text",
                "placeholder": (f.get("placeholder") or "")[:300],
                "required": bool(f.get("required")),
                **({"options": f["options"]} if f.get("options") else {}),
            }
        )
        if len(cleaned) >= max_fields:
            break
    return cleaned


def _save(
    by_slug: dict[str, list[dict]],
    errors: dict[str, str],
    meta: dict,
) -> None:
    payload = {"by_slug": by_slug, "errors": errors, "_meta": meta}
    OUT.write_text(json.dumps(payload, ensure_ascii=False, indent=2), encoding="utf-8")


def _load_existing() -> tuple[dict[str, list[dict]], dict[str, str], dict | None]:
    if not OUT.is_file():
        return {}, {}, None
    try:
        data = json.loads(OUT.read_text(encoding="utf-8"))
        return (
            data.get("by_slug") or {},
            data.get("errors") or {},
            data.get("_meta"),
        )
    except Exception:
        return {}, {}, None


def main() -> None:
    ap = argparse.ArgumentParser(description="Snapshot smile.one account form fields per merchant.")
    ap.add_argument(
        "--force",
        action="store_true",
        help="Ignore existing manual_account_fields_scraped.json and scrape from scratch.",
    )
    args = ap.parse_args()
    resume = not args.force

    from order_automation import run_checkout_requirements

    auth = (ROOT / "smile_auth.json").is_file()
    use_sess = auth
    print(f"Using saved session: {use_sess} (smile_auth.json {'found' if auth else 'missing'})")

    by_slug: dict[str, list[dict]] = {}
    errors: dict[str, str] = {}
    prev_meta = None

    if resume:
        by_slug, errors, prev_meta = _load_existing()
        print(f"Resume: loaded {len(by_slug)} slugs from {OUT.name} (errors recorded: {len(errors)})")
    else:
        print("Starting fresh (--force).")

    meta: dict = {
        "scraped_at": time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()),
        "session": use_sess,
        "resume": resume,
        "previous_run": prev_meta.get("scraped_at") if isinstance(prev_meta, dict) else None,
    }

    merchants = json.loads(DATA.read_text(encoding="utf-8")).get("merchants") or []

    for i, m in enumerate(merchants):
        url = (m.get("url") or "").strip()
        s = _slug(url)
        if not s:
            errors[url or f"#{i}"] = "no_slug"
            _save(by_slug, errors, meta)
            continue

        if resume and s in by_slug and by_slug[s]:
            print(f"[{i+1}/{len(merchants)}] {s} — skip (already saved)", flush=True)
            continue

        print(f"[{i+1}/{len(merchants)}] {s} …", flush=True)
        try:
            res = run_checkout_requirements(
                url, first_screen_only=True, use_saved_session=use_sess
            )
            if not res.get("ok"):
                errors[s] = res.get("message") or str(res)[:200]
                _save(by_slug, errors, meta)
                time.sleep(1.2)
                continue
            raw = res.get("fields") or []
            trimmed = _trim_fields(raw)
            if not trimmed:
                errors[s] = "no_visible_fields"
                _save(by_slug, errors, meta)
                time.sleep(1.2)
                continue
            by_slug[s] = trimmed
            if s in errors:
                del errors[s]
        except Exception as e:
            errors[s] = str(e)[:200]
        _save(by_slug, errors, meta)
        time.sleep(1.2)

    meta["completed"] = True
    _save(by_slug, errors, meta)
    print(f"Done — wrote {OUT} — ok: {len(by_slug)}  errors: {len(errors)}")


if __name__ == "__main__":
    main()
