#!/usr/bin/env python3
"""Budget Transfer Bot Argentina — Twitter/X MVP.

Dry-run by default. Posts only with --post and ENABLE_POSTING=1.
"""
from __future__ import annotations

import argparse
import datetime as dt
import hashlib
import json
import os
import sqlite3
import subprocess
import sys
from dataclasses import dataclass
from pathlib import Path
from typing import Any

import requests

API_URL = "https://www.presupuestoabierto.gob.ar/api/v1/credito?format=json"
MAX_TWEET_LEN = 280


@dataclass
class Candidate:
    topic: str
    score: int
    tweet: str
    payload: dict[str, Any]
    source_rows: dict[str, Any]
    thread: list[str] | None = None


def require_token() -> str:
    token = os.getenv("PRESUPUESTO_ABIERTO_TOKEN", "").strip()
    if not token:
        raise SystemExit("Missing PRESUPUESTO_ABIERTO_TOKEN")
    return token


def api_post(token: str, payload: dict[str, Any]) -> list[dict[str, Any]]:
    r = requests.post(
        API_URL,
        headers={"Authorization": f"Bearer {token}", "Content-Type": "application/json", "Accept": "application/json"},
        json=payload,
        timeout=60,
    )
    if r.status_code != 200:
        body = r.text[:800].replace(token, "[REDACTED]")
        raise RuntimeError(f"Presupuesto Abierto API error {r.status_code}: {body}")
    data = r.json()
    if not isinstance(data, list):
        raise RuntimeError(f"Unexpected API response: {type(data).__name__}")
    return data


def today_ar() -> dt.date:
    return dt.datetime.now(dt.timezone(dt.timedelta(hours=-3))).date()


def date_str(d: dt.date) -> str:
    return d.isoformat()


def fmt_millions(value: float) -> str:
    """Format Presupuesto Abierto amount expressed in millions of ARS."""
    v = float(value or 0)
    if v >= 1_000_000:
        return f"${v/1_000_000:.2f} billones".replace(".", ",")
    if v >= 1_000:
        n = v / 1_000
        if n >= 100:
            return f"${n:,.0f} mil millones".replace(",", ".")
        return f"${n:.1f} mil millones".replace(".", ",")
    return f"${v:,.0f} millones".replace(",", ".")


def pct(num: float, den: float) -> str:
    if not den or den <= 0:
        return "N/A"
    return f"{num / den * 100:.1f}%".replace(".", ",")


def one_decimal(value: float) -> str:
    return f"{value:.1f}".replace(".", ",")


def shorten_tweet(text: str) -> str:
    text = " ".join(text.split())
    if len(text) <= MAX_TWEET_LEN:
        return text
    replacements = [
        ("Universidad de Buenos Aires", "UBA"),
        ("se registraron pagos", "registró pagos"),
        ("universidades nacionales", "univ. nacionales"),
        ("Hospitales universitarios", "Hosp. universitarios"),
        ("crédito vigente", "créd. vigente"),
        ("Presupuesto Abierto", "PA"),
    ]
    for old, new in replacements:
        text = text.replace(old, new)
        if len(text) <= MAX_TWEET_LEN:
            return text
    return text[:277].rstrip() + "…"


def last_available_university_day(token: str, min_total_millions: float = 1_000.0) -> str:
    """Pick latest material university-payment day.

    Presupuesto Abierto can publish tiny adjustment rows after the last large payroll/payment day.
    For social output we want the latest day whose aggregated paid amount is material, not a $1M
    correction that would be technically true but useless.
    """
    payload = {
        "title": "Últimos pagos universidades",
        "ejercicios": [today_ar().year],
        "columns": ["impacto_presupuestario_fecha", "credito_pagado"],
        "filters": [
            {"column": "inciso_id", "operator": "equal", "value": "5"},
            {"column": "credito_pagado", "operator": "greater_than", "value": "0"},
            {"column": "parcial_desc", "operator": "like", "value": "%universidades nacionales%"},
        ],
        "order": [{"column": "impacto_presupuestario_fecha", "order": "desc"}],
    }
    rows = api_post(token, payload)
    if not rows:
        raise RuntimeError("No university paid-transfer rows found")
    for row in rows:
        if float(row.get("credito_pagado") or 0) >= min_total_millions:
            return row["impacto_presupuestario_fecha"][:10]
    return rows[0]["impacto_presupuestario_fecha"][:10]


def build_universidades(token: str) -> Candidate:
    year = today_ar().year
    day = last_available_university_day(token)
    filters = [
        {"column": "inciso_id", "operator": "equal", "value": "5"},
        {"column": "credito_pagado", "operator": "greater_than", "value": "0"},
        {"column": "impacto_presupuestario_fecha", "operator": "equal", "value": day},
        {"column": "parcial_desc", "operator": "like", "value": "%universidades nacionales%"},
    ]
    total = api_post(token, {
        "title": "Universidades nacionales pagos del día",
        "ejercicios": [year],
        "columns": ["impacto_presupuestario_fecha", "parcial_desc", "credito_pagado", "ultima_actualizacion_fecha"],
        "filters": filters,
    })[0]
    top = api_post(token, {
        "title": "Top universidades pagos del día",
        "ejercicios": [year],
        "columns": ["impacto_presupuestario_fecha", "subparcial_desc", "credito_pagado"],
        "filters": filters,
        "order": [{"column": "credito_pagado", "order": "desc"}],
    })
    hosp = api_post(token, {
        "title": "Hospitales universitarios pagos del día",
        "ejercicios": [year],
        "columns": ["impacto_presupuestario_fecha", "actividad_desc", "credito_pagado", "ultima_actualizacion_fecha"],
        "filters": filters + [{"column": "actividad_desc", "operator": "like", "value": "%Hospitales Universitarios%"}],
    })
    annual = api_post(token, {
        "title": "Universidades nacionales acumulado anual",
        "ejercicios": [year],
        "columns": ["parcial_desc", "credito_pagado", "credito_vigente", "ultima_actualizacion_fecha"],
        "filters": [
            {"column": "inciso_id", "operator": "equal", "value": "5"},
            {"column": "parcial_desc", "operator": "like", "value": "%universidades nacionales%"},
        ],
    })
    annual_paid = sum(float(r.get("credito_pagado") or 0) for r in annual)
    annual_vig = sum(float(r.get("credito_vigente") or 0) for r in annual)
    top1 = top[0] if top else {}
    hosp_paid = sum(float(r.get("credito_pagado") or 0) for r in hosp)
    cut = (total.get("ultima_actualizacion_fecha") or annual[0].get("ultima_actualizacion_fecha") or "")[:10]
    cut_dm = cut[8:10] + "/" + cut[5:7] if len(cut) >= 10 else "s/d"
    day_dm = day[8:10] + "/" + day[5:7]
    tweet = (
        f"Universidades: tener presupuesto ≠ haber cobrado. "
        f"En plena marcha, Presupuesto Abierto registra {fmt_millions(total['credito_pagado'])} pagados el {day_dm} "
        f"a universidades nacionales. UBA: {fmt_millions(top1.get('credito_pagado', 0))}. "
        f"¿Qué significa ejecución {pct(annual_paid, annual_vig)}? Abro hilo 👇"
    )
    thread = [
        shorten_tweet(
            f"1/ Fuente: Presupuesto Abierto, corte {cut_dm}. Montos nominales en pesos. "
            f"El dato mide pagos registrados, no anuncios ni necesidades estimadas."
        ),
        shorten_tweet(
            f"2/ Cómo leer el %: crédito vigente = plata autorizada; pagado = plata efectivamente registrada como pago. "
            f"Ejecución {pct(annual_paid, annual_vig)} significa que se pagó ese porcentaje del crédito vigente 2026."
        ),
        shorten_tweet(
            f"3/ Universidades nacionales 2026: pagado {fmt_millions(annual_paid)} sobre crédito vigente {fmt_millions(annual_vig)}. "
            f"Lectura simple: de cada $100 autorizados, figuran pagos por ${one_decimal(annual_paid/annual_vig*100)}."
            if annual_vig else "3/ Universidades nacionales: crédito vigente informado en cero; no se calcula porcentaje."
        ),
        shorten_tweet(
            f"4/ Hospitales universitarios: el {day_dm} figuran pagos por {fmt_millions(hosp_paid)}. "
            f"Ojo: no todo pago universitario es hospitalario; salarios, funcionamiento y hospitales son partidas distintas."
        ),
        shorten_tweet(
            "5/ Qué NO dice el dato: no mide inflación, suficiencia del presupuesto ni intencionalidad política. "
            "Sí muestra ritmo de pagos oficiales respecto del crédito vigente."
        ),
    ]
    return Candidate(
        topic="universidades",
        score=100,
        tweet=shorten_tweet(tweet),
        payload={"day": day, "year": year},
        source_rows={"total": total, "top": top[:10], "hospitales": hosp, "annual": annual},
        thread=thread,
    )


def build_atn(token: str) -> Candidate | None:
    year = today_ar().year
    since = date_str(today_ar() - dt.timedelta(days=14))
    filters = [
        {"column": "inciso_id", "operator": "equal", "value": "5"},
        {"column": "credito_pagado", "operator": "greater_than", "value": "0"},
        {"column": "impacto_presupuestario_fecha", "operator": "greater_equal_than", "value": since},
        {"column": "actividad_desc", "operator": "like", "value": "%Aportes del Tesoro Nacional%"},
    ]
    rows = api_post(token, {
        "title": "ATN recientes",
        "ejercicios": [year],
        "columns": ["impacto_presupuestario_fecha", "actividad_desc", "ubicacion_geografica_desc", "credito_pagado", "ultima_actualizacion_fecha"],
        "filters": filters,
        "order": [{"column": "impacto_presupuestario_fecha", "order": "desc"}, {"column": "credito_pagado", "order": "desc"}],
    })
    if not rows:
        return None
    r = rows[0]
    day = r["impacto_presupuestario_fecha"][:10]
    day_dm = day[8:10] + "/" + day[5:7]
    cut = (r.get("ultima_actualizacion_fecha") or "")[:10]
    cut_dm = cut[8:10] + "/" + cut[5:7] if len(cut) >= 10 else "s/d"
    tweet = (
        f"ATN: Presupuesto Abierto registra pago el {day_dm} a {r.get('ubicacion_geografica_desc','s/d')} "
        f"por {fmt_millions(r.get('credito_pagado',0))}. Contexto: sigue la discusión por reparto de ATN a provincias. Corte: {cut_dm}."
    )
    thread = [
        shorten_tweet("1/ ATN = Aportes del Tesoro Nacional. Son transferencias discrecionales a provincias/municipios, separadas de coparticipación automática."),
        shorten_tweet(f"2/ Fuente: Presupuesto Abierto, corte {cut_dm}. El monto publicado es crédito pagado registrado, no anuncio político."),
    ]
    return Candidate("atn", 70, shorten_tweet(tweet), {"since": since}, {"row": r}, thread=thread)


def pick_candidate(token: str, topic: str) -> Candidate:
    if topic == "universidades":
        return build_universidades(token)
    if topic == "atn":
        c = build_atn(token)
        if not c:
            raise RuntimeError("No ATN candidate found")
        return c
    if topic == "auto":
        # Current MVP heuristic: universities outrank ATN when there is active university agenda.
        # Later: replace/augment with an agent that scores today's X/news topics.
        candidates = [build_universidades(token)]
        atn = build_atn(token)
        if atn:
            candidates.append(atn)
        return sorted(candidates, key=lambda c: c.score, reverse=True)[0]
    raise ValueError(topic)


def db_connect(path: str) -> sqlite3.Connection:
    conn = sqlite3.connect(path)
    conn.execute(
        """CREATE TABLE IF NOT EXISTS generated_posts (
        id TEXT PRIMARY KEY,
        created_at TEXT NOT NULL,
        topic TEXT NOT NULL,
        tweet TEXT NOT NULL,
        posted INTEGER NOT NULL DEFAULT 0,
        metadata TEXT NOT NULL
    )"""
    )
    return conn


def post_id(tweet: str) -> str:
    return hashlib.sha256(tweet.encode("utf-8")).hexdigest()[:16]


def save_candidate(conn: sqlite3.Connection, cand: Candidate, posted: bool) -> tuple[str, bool]:
    pid = post_id(cand.tweet)
    try:
        conn.execute(
            "INSERT INTO generated_posts(id, created_at, topic, tweet, posted, metadata) VALUES (?,?,?,?,?,?)",
            (pid, dt.datetime.utcnow().isoformat(), cand.topic, cand.tweet, int(posted), json.dumps({"payload": cand.payload, "source_rows": cand.source_rows}, ensure_ascii=False)),
        )
        conn.commit()
        return pid, True
    except sqlite3.IntegrityError:
        return pid, False


def xurl_post(tweet: str) -> str:
    if os.getenv("ENABLE_POSTING") != "1":
        raise RuntimeError("ENABLE_POSTING must be 1 to post")
    proc = subprocess.run(["xurl", "post", tweet], check=True, text=True, capture_output=True)
    try:
        data = json.loads(proc.stdout)
        return str(data.get("data", {}).get("id") or "")
    except Exception:
        return ""


def xurl_reply(parent_id: str, tweet: str) -> str:
    if os.getenv("ENABLE_POSTING") != "1":
        raise RuntimeError("ENABLE_POSTING must be 1 to post")
    proc = subprocess.run(["xurl", "reply", parent_id, tweet], check=True, text=True, capture_output=True)
    try:
        data = json.loads(proc.stdout)
        return str(data.get("data", {}).get("id") or "")
    except Exception:
        return ""


def main() -> int:
    ap = argparse.ArgumentParser()
    ap.add_argument("--topic", choices=["auto", "universidades", "atn"], default="auto")
    ap.add_argument("--post", action="store_true", help="Actually post via xurl. Requires ENABLE_POSTING=1")
    ap.add_argument("--thread", action="store_true", help="When posting, reply with the generated detail thread")
    ap.add_argument("--dry-run", action="store_true", help="Generate only; never post")
    ap.add_argument("--db", default=os.getenv("BOT_DB_PATH", "./bot_state.sqlite3"))
    ap.add_argument("--json", action="store_true")
    args = ap.parse_args()

    token = require_token()
    cand = pick_candidate(token, args.topic)
    if len(cand.tweet) > MAX_TWEET_LEN:
        raise RuntimeError(f"Tweet too long: {len(cand.tweet)}")

    posted = False
    posted_ids: list[str] = []
    if args.post and not args.dry_run:
        parent_id = xurl_post(cand.tweet)
        if parent_id:
            posted_ids.append(parent_id)
        posted = True
        if args.thread and cand.thread:
            reply_to = parent_id
            for reply in cand.thread:
                if not reply_to:
                    break
                reply_to = xurl_reply(reply_to, reply)
                if reply_to:
                    posted_ids.append(reply_to)

    conn = db_connect(args.db)
    pid, is_new = save_candidate(conn, cand, posted)

    out = {
        "id": pid,
        "new": is_new,
        "posted": posted,
        "posted_ids": posted_ids,
        "topic": cand.topic,
        "len": len(cand.tweet),
        "tweet": cand.tweet,
        "thread": cand.thread or [],
        "thread_lens": [len(t) for t in (cand.thread or [])],
        "source": cand.source_rows,
    }
    if args.json:
        print(json.dumps(out, ensure_ascii=False, indent=2))
    else:
        print(f"topic={cand.topic} len={len(cand.tweet)} new={is_new} posted={posted}")
        print(cand.tweet)
        if cand.thread:
            print("\nTHREAD:")
            for i, t in enumerate(cand.thread, 1):
                print(f"[{i}] len={len(t)} {t}")
    return 0


if __name__ == "__main__":
    raise SystemExit(main())