"""
Map SMM services to platform tabs using strict word/URL patterns.
- Prefer the service name (title) first — panels often miscategorize rows.
- If the title has no platform cue, use a specific category (not generic SMM labels).
- If multiple brands appear in the chosen text, the left-most mention wins.
"""

from __future__ import annotations

import re
from typing import Any

TAB_ORDER: list[str] = [
    "TikTok",
    "Instagram",
    "YouTube",
    "Facebook",
    "Telegram",
    "Twitter",
    "LinkedIn",
    "Snapchat",
    "Pinterest",
    "Reddit",
    "Spotify",
    "Discord",
    "Twitch",
    "SoundCloud",
    "Threads",
    "Tumblr",
    "VK",
    "Likee",
    "Kwai",
    "Other",
]

# One regex per platform; must be conservative (word boundaries / known URLs), no 2-letter platform codes.
_PATTERNS: dict[str, re.Pattern[str]] = {
    "TikTok": re.compile(
        r"\b(tiktok|tik[\s\-]?tok)\b|抖音",
        re.I,
    ),
    "Instagram": re.compile(
        r"\binstagram\b|\binsta\.gram\b|(?<![a-z])instagr\.am(?![a-z])|instagram\.com",
        re.I,
    ),
    "YouTube": re.compile(
        r"\byoutube\b|youtu\.be|youtube\.com|(?<![a-z])ytimg\.com(?![a-z])|"
        r"\byt\b|"
        r"\byt[\s\-_/]*(?:views?|subs?|subscribers?|likes?|shorts?|comments?|watch|stream)\b",
        re.I,
    ),
    "Facebook": re.compile(
        r"\bfacebook\b|facebook\.com|fb\.com|m\.facebook|l\.facebook|fb\.me",
        re.I,
    ),
    "Telegram": re.compile(
        r"\btelegram\b|t\.me/|telegram\.me/|telegram\.org",
        re.I,
    ),
    "Twitter": re.compile(
        r"\btwitter\b|twitter\.com|"
        r"(?:^|[\s,;])(?:https?://)?(?:www\.)?x\.com(?:/|[\s?#]|$)|"
        r"\bx\s*/\s*twitter\b|\btwitter\s*/\s*x\b|"
        r"\bx\s+(?:followers?|likes?|views?|retweets?|reposts?|comments?|impressions?|spaces?|subs?)\b|"
        r"\b(?:tweet|retweet)\b",
        re.I,
    ),
    "LinkedIn": re.compile(r"\blinkedin\b|linkedin\.com", re.I),
    "Snapchat": re.compile(r"\bsnapchat\b|snap\.chat|snapchat\.com", re.I),
    "Pinterest": re.compile(r"\bpinterest\b|pin\.it|pinterest\.com", re.I),
    "Reddit": re.compile(r"\breddit\b|reddit\.com", re.I),
    "Spotify": re.compile(r"\bspotify\b|spotify\.com", re.I),
    "Discord": re.compile(r"\bdiscord\b|discord\.gg|discord\.com", re.I),
    "Twitch": re.compile(r"\btwitch\b|twitch\.tv", re.I),
    "SoundCloud": re.compile(r"\bsoundcloud\b|soundcloud\.com", re.I),
    # Avoid matching the English word "threads" in unrelated titles — require Meta Threads URLs/branding.
    "Threads": re.compile(
        r"threads\.net|threads\.com/|meta\s+threads\b|\bthreads\s*app\b|"
        r"\bthreads\s+(?:followers?|likes?|views?|comments?|reposts?)\b|"
        r"\binstagram\s+threads\b|\big\s+threads\b",
        re.I,
    ),
    "Tumblr": re.compile(r"\btumblr\b|tumblr\.com", re.I),
    "VK": re.compile(r"vk\.com|vkontakte|\bvk\s+video\b", re.I),
    "Likee": re.compile(r"\blikee\b|likee\.video", re.I),
    "Kwai": re.compile(r"\bkwai\b|kwai\.com", re.I),
}

# Category text that is too vague to trust alone — we will use the service name instead.
_GENERIC_CATEGORY = re.compile(
    r"^[\s\-–—]*("
    r"default|other|others?|misc|mixed|various|"
    r"all|general|services?|smm|panel|reseller|boost|"
    r"cheap|cheapest|best|top|premium|fast|instant|express|"
    r"new|hot|sale|promo|special|package|combo|bundle|"
    r"likes?|views?|followers?|subscribers?|comments?|members?|"
    r"worldwide|global|usa|uk|asia"
    r")[\s\-–—]*$",
    re.I,
)


def _is_generic_category(cat: str) -> bool:
    c = (cat or "").strip()
    if len(c) < 2:
        return True
    if _GENERIC_CATEGORY.match(c):
        return True
    return False


def _first_platform_in_text(text: str) -> str | None:
    """
    Find the platform whose pattern matches earliest in the string.
    Tie-break: TAB_ORDER (more common brands slightly earlier is already reflected in list).
    """
    if not (text or "").strip():
        return None
    best: tuple[int, str] | None = None  # (start_index, label)
    for label in TAB_ORDER:
        if label == "Other":
            continue
        pat = _PATTERNS.get(label)
        if not pat:
            continue
        m = pat.search(text)
        if not m:
            continue
        pos = m.start()
        if best is None or pos < best[0]:
            best = (pos, label)
        elif pos == best[0] and label in TAB_ORDER and best[1] in TAB_ORDER:
            # Same position: prefer earlier in TAB_ORDER (defined product priority)
            if TAB_ORDER.index(label) < TAB_ORDER.index(best[1]):
                best = (pos, label)
    return best[1] if best else None


def detect_platform(service: dict[str, Any]) -> str:
    cat = (service.get("category") or "").strip()
    name = (service.get("name") or "").strip()

    # 1) Title first — reflects the actual product; category is often a stale/wrong bucket.
    if name:
        hit = _first_platform_in_text(name)
        if hit:
            return hit

    # 2) Specific category when the title had no platform keyword
    if cat and not _is_generic_category(cat):
        hit = _first_platform_in_text(cat)
        if hit:
            return hit

    # 3) Last resort: combined text, then category-only when there is no title
    if cat and name:
        hit = _first_platform_in_text(f"{cat} {name}")
        if hit:
            return hit
    if cat and not name:
        hit = _first_platform_in_text(cat)
        if hit:
            return hit

    return "Other"


def annotate_services_platforms(services: list[dict[str, Any]]) -> None:
    for s in services:
        s["platform_tab"] = detect_platform(s)


def tabs_for_catalog(services: list[dict[str, Any]]) -> list[str]:
    present: set[str] = set()
    for s in services:
        present.add(detect_platform(s))
    out = ["All"]
    for t in TAB_ORDER:
        if t in present:
            out.append(t)
    return out