#!/usr/bin/env python3
"""
Crunch Mixpanel ad traffic data into report-ready JSON + inject into report.html.
Usage: python3 crunch.py [raw-json-path]
Default: raw/ad-traffic-2026-03-25.json
"""

import json
import sys
import os
from datetime import datetime, timezone

METRIC_MAP = {
    "A. Visits - Ads (X.com)": ("x", "visits"),
    "B. Registrations - Ads (X.com)": ("x", "regs"),
    "C. Subscriptions - Ads (X.com)": ("x", "subs"),
    "D. Visits - Ads (Google)": ("google", "visits"),
    "E. Registrations - Ads (Google)": ("google", "regs"),
    "F. Subscriptions - Ads (Google)": ("google", "subs"),
    "G. Visits - Ads (Meta)": ("meta", "visits"),
    "H. Registrations - Ads (Meta)": ("meta", "regs"),
    "I. Subscriptions - Ads (Meta)": ("meta", "subs"),
}

def extract_url(article_data):
    """Find the URL key (the one that isn't $overall)."""
    for key in article_data:
        if key != "$overall":
            return key
    return None

def crunch(raw_path):
    with open(raw_path) as f:
        data = json.load(f)

    articles = {}  # title -> article dict

    for metric_key, (platform, field_type) in METRIC_MAP.items():
        series = data["series"].get(metric_key, {})
        for title, article_data in series.items():
            if title == "$overall":
                continue

            if title not in articles:
                articles[title] = {
                    "title": title,
                    "url": None,
                    "visits_x": 0, "visits_google": 0, "visits_meta": 0,
                    "regs_x": 0, "regs_google": 0, "regs_meta": 0,
                    "subs_x": 0, "subs_google": 0, "subs_meta": 0,
                }

            # Extract URL if we don't have it yet
            if articles[title]["url"] is None:
                articles[title]["url"] = extract_url(article_data)

            # Get the value
            value = article_data.get("$overall", {}).get("all", 0)
            col = f"{field_type}_{platform}"
            articles[title][col] = value

    # Compute totals
    article_list = []
    for a in articles.values():
        a["total_visits"] = a["visits_x"] + a["visits_google"] + a["visits_meta"]
        a["total_regs"] = a["regs_x"] + a["regs_google"] + a["regs_meta"]
        a["total_subs"] = a["subs_x"] + a["subs_google"] + a["subs_meta"]
        article_list.append(a)

    # Sort by total registrations descending
    article_list.sort(key=lambda x: x["total_regs"], reverse=True)

    # Platform summaries
    platforms = {}
    for plat in ["x", "google", "meta"]:
        v_key = f"visits_{plat}"
        r_key = f"regs_{plat}"
        s_key = f"subs_{plat}"
        plat_articles = [a for a in article_list if a[v_key] > 0 or a[r_key] > 0 or a[s_key] > 0]
        total_visits = sum(a[v_key] for a in article_list)
        total_regs = sum(a[r_key] for a in article_list)
        total_subs = sum(a[s_key] for a in article_list)
        platforms[plat] = {
            "articles": len(plat_articles),
            "visits": total_visits,
            "regs": total_regs,
            "subs": total_subs,
            "conversion_rate": round(total_regs / total_visits * 100, 2) if total_visits > 0 else 0,
            "sub_rate": round(total_subs / total_regs * 100, 2) if total_regs > 0 else 0,
        }

    # Cross-platform opportunities (based on registrations)
    opportunities = []
    for a in article_list:
        if a["total_regs"] < 1:
            continue
        plat_regs = {"x": a["regs_x"], "google": a["regs_google"], "meta": a["regs_meta"]}
        active = {p: v for p, v in plat_regs.items() if v > 0}
        missing = [p for p, v in plat_regs.items() if v == 0]
        if len(active) >= 1 and len(missing) >= 1:
            primary = max(active, key=active.get)
            opportunities.append({
                "title": a["title"],
                "url": a["url"],
                "primary_platform": primary,
                "primary_regs": active[primary],
                "primary_subs": a[f"subs_{primary}"],
                "missing_platforms": missing,
                "total_regs": a["total_regs"],
                "total_subs": a["total_subs"],
                "total_visits": a["total_visits"],
                "opportunity_score": active[primary] * len(missing),
            })
    opportunities.sort(key=lambda x: x["opportunity_score"], reverse=True)

    report_data = {
        "articles": article_list,
        "opportunities": opportunities,
        "summary": {
            "platforms": platforms,
            "total_articles": len(article_list),
            "date_range": data.get("date_range", {}),
            "generated_at": datetime.now(timezone.utc).isoformat(),
        },
    }

    return report_data

def main():
    raw_path = sys.argv[1] if len(sys.argv) > 1 else "raw/ad-traffic-2026-03-25.json"
    
    if not os.path.exists(raw_path):
        print(f"Error: {raw_path} not found")
        sys.exit(1)

    print(f"Crunching {raw_path}...")
    report_data = crunch(raw_path)

    # Write JSON
    with open("report-data.json", "w") as f:
        json.dump(report_data, f, indent=2)
    print(f"Wrote report-data.json ({len(report_data['articles'])} articles)")

    # Inject into report.html
    html_path = "report.html"
    if os.path.exists(html_path):
        with open(html_path) as f:
            html = f.read()
        
        data_js = f"const REPORT_DATA = {json.dumps(report_data)};"

        # Replace the entire line containing REPORT_DATA (preserves surrounding <script> tags)
        lines = html.splitlines(keepends=True)
        new_lines = []
        replaced = False
        for line in lines:
            if "const REPORT_DATA" in line and not replaced:
                new_lines.append(data_js + "\n")
                replaced = True
            else:
                new_lines.append(line)
        if replaced:
            with open(html_path, "w") as f:
                f.writelines(new_lines)
            print(f"Injected data into {html_path}")
        else:
            print(f"Warning: no REPORT_DATA line found in {html_path}")
    else:
        print(f"Warning: {html_path} not found, skipping injection")

    # Print summary
    s = report_data["summary"]
    print(f"\n--- Summary ---")
    print(f"Date range: {s['date_range'].get('from_date', '?')} → {s['date_range'].get('to_date', '?')}")
    print(f"Total articles: {s['total_articles']}")
    for plat, stats in s["platforms"].items():
        print(f"  {plat}: {stats['articles']} articles, {stats['visits']} visits, {stats['regs']} regs ({stats['conversion_rate']}% CVR), {stats['subs']} subs ({stats['sub_rate']}% sub rate)")
    print(f"Articles to cross-promote: {len(report_data['opportunities'])}")

if __name__ == "__main__":
    main()
