"""
Smile.One Web Scraper
=====================
ဝဘ်ဆိုက်မှ ဂိမ်းများ၊ ဝန်ဆောင်မှုများနှင့် လျှော့ဈေးများကို ရယူပါသည်။
"""

import requests
from bs4 import BeautifulSoup
import json
import time
from urllib.parse import urljoin
from typing import Optional
import argparse


class SmileOneScraper:
    """Smile.One website scraper class"""
    
    BASE_URL = "https://www.smile.one"
    
    def __init__(self, delay: float = 1.0):
        """
        Args:
            delay: ဆာဗာအား ဖိအားမပေးရန် request များကြား စောင့်ချိန် (စက္ကန့်)
        """
        self.session = requests.Session()
        self.session.headers.update({
            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
            "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
            "Accept-Language": "en-US,en;q=0.5",
        })
        self.delay = delay
    
    def _get_page(self, url: str) -> Optional[BeautifulSoup]:
        """ရိုးရှင်းသော GET request ဖြင့် စာမျက်နှာကို ရယူသည်"""
        try:
            time.sleep(self.delay)
            response = self.session.get(url, timeout=15)
            response.raise_for_status()
            return BeautifulSoup(response.text, "lxml")
        except requests.RequestException as e:
            print(f"Error fetching {url}: {e}")
            return None
    
    def scrape_merchants(self, url: str = None) -> list[dict]:
        """
        ဂိမ်းများနှင့် ဝန်ဆောင်မှုများ (merchants) စာရင်းကို ရယူသည်။
        
        Returns:
            Merchant အချက်အလက်များ ပါဝင်သည့် list
        """
        url = url or f"{self.BASE_URL}/br/"
        soup = self._get_page(url)
        if not soup:
            return []
        
        merchants = []
        seen_urls = {}
        
        for link in soup.find_all("a", href=True):
            href = link.get("href", "")
            if "/merchant/" not in href and "/entertainment/pay/" not in href:
                continue
            
            full_url = urljoin(self.BASE_URL, href).split("?")[0]
            name = self._extract_name(link, soup)
            if not name:
                name = self._name_from_url(full_url)
            if len(name) < 3:
                continue
            
            if full_url not in seen_urls or len(name) > len(seen_urls[full_url]):
                seen_urls[full_url] = name
        
        for url_key, name in seen_urls.items():
            merchants.append({"name": name, "url": url_key, "type": "merchant"})
        
        return merchants
    
    def _extract_name(self, link, soup) -> str:
        """Extract product name from link text or nearby heading"""
        text = link.get_text(strip=True)
        if not text:
            return ""
        text_lower = text.lower()
        if "adicionar aos favoritos" in text_lower or "remover" in text_lower:
            name = text.replace("Adicionar aos favoritos", "").replace("Remover", "").strip()
            return name if len(name) > 2 else ""
        if "comprar agora" in text_lower:
            parent = link.find_parent()
            for _ in range(5):
                if not parent:
                    break
                for prev in parent.find_all_previous(["h1", "h2", "h3", "h4"]):
                    t = prev.get_text(strip=True)
                    if t and len(t) > 2 and "adicionar" not in t.lower() and "ver" not in t.lower():
                        return t
                parent = parent.find_parent()
        return text if len(text) > 2 else ""
    
    def _name_from_url(self, url: str) -> str:
        parts = url.rstrip("/").split("/")
        for p in reversed(parts):
            if p and p not in ("merchant", "game", "pay", "entertainment", "br"):
                return p.replace("-", " ").replace("_", " ").title()
        return "Product"
    
    def scrape_popular_games(self, url: str = None) -> list[dict]:
        """Populares (လူကြိုက်များသော) ဂိမ်းများကို ရယူသည်"""
        url = url or f"{self.BASE_URL}/br/"
        soup = self._get_page(url)
        if not soup:
            return []
        
        popular = []
        # Section headers များမှ ဂိမ်းအမည်များကို ရယူသည်
        for heading in soup.find_all(["h2", "h3", "h4"]):
            text = heading.get_text(strip=True)
            if text and not text.startswith("Adicionar") and not text.startswith("Ver"):
                popular.append({"name": text})
        
        return popular
    
    def scrape_game_details(self, merchant_url: str) -> Optional[dict]:
        """Merchant/Game စာမျက်နှာတစ်ခုမှ အသေးစိတ်များကို ရယူသည်"""
        soup = self._get_page(merchant_url)
        if not soup:
            return None
        
        details = {
            "url": merchant_url,
            "title": None,
            "discount": None,
            "prices": [],
            "description": None
        }
        
        # Title
        title = soup.find("h1") or soup.find("title")
        if title:
            details["title"] = title.get_text(strip=True)
        
        # Discount/Offer text
        for elem in soup.find_all(string=lambda t: t and ("%" in str(t) or "OFF" in str(t).upper() or "Desconto" in str(t))):
            discount_text = elem.strip() if isinstance(elem, str) else elem.get_text(strip=True)
            if discount_text and len(discount_text) < 50:
                details["discount"] = discount_text
                break
        
        return details
    
    def scrape_all(self, base_url: str = None) -> dict:
        """စာမျက်နှာအားလုံးမှ အချက်အလက်များကို ရယူသည်"""
        base_url = base_url or f"{self.BASE_URL}/br/"
        result = {
            "source": base_url,
            "merchants": self.scrape_merchants(base_url),
            "popular": self.scrape_popular_games(base_url),
        }
        return result


def main():
    parser = argparse.ArgumentParser(
        description="Smile.One Web Scraper - Games & Services"
    )
    parser.add_argument(
        "-o", "--output",
        default="smile_one_data.json",
        help="Output file path (JSON)"
    )
    parser.add_argument(
        "-u", "--url",
        default="https://www.smile.one/br/",
        help="Scrape လုပ်မည့် URL"
    )
    parser.add_argument(
        "-d", "--delay",
        type=float,
        default=1.0,
        help="Request များကြား စောင့်ချိန် (စက္ကန့်)"
    )
    parser.add_argument(
        "--csv",
        action="store_true",
        help="CSV format ဖြင့် သိမ်းဆည်းမည်"
    )
    
    args = parser.parse_args()
    
    print("Smile.One Scraper starting...")
    scraper = SmileOneScraper(delay=args.delay)
    
    data = scraper.scrape_all(args.url)
    
    # Deduplicate merchants by URL
    seen = set()
    unique_merchants = []
    for m in data["merchants"]:
        if m["url"] not in seen:
            seen.add(m["url"])
            unique_merchants.append(m)
    data["merchants"] = unique_merchants
    
    # Save JSON
    output_file = args.output
    with open(output_file, "w", encoding="utf-8") as f:
        json.dump(data, f, ensure_ascii=False, indent=2)
    print(f"Saved JSON: {output_file}")
    
    # Optional CSV export
    if args.csv and data["merchants"]:
        import pandas as pd
        df = pd.DataFrame(data["merchants"])
        csv_file = output_file.replace(".json", ".csv")
        df.to_csv(csv_file, index=False, encoding="utf-8-sig")
        print(f"Saved CSV: {csv_file}")
    
    print(f"Done: {len(data['merchants'])} merchants, {len(data['popular'])} popular items")


if __name__ == "__main__":
    main()
