"""Health Bridge — private Health Connect ingestion API for Pipo.

Receives Pixel Health Connect batches over Tailscale, stores raw records
idempotently, and rebuilds local-day summaries per subject.

Design goals:
- raw_data preserves source records with provenance
- daily_summary is derived, idempotent, dashboard-friendly
- /debug/day explains every number by source/classification
"""
import hashlib
import hmac
import json
import os
import sqlite3
import time
from contextlib import asynccontextmanager
from datetime import datetime, timezone
from zoneinfo import ZoneInfo

from fastapi import FastAPI, HTTPException, Request
from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import FileResponse
import uvicorn

DB_PATH = os.path.expanduser("~/health-bridge/health.db")
AUTH_TOKEN = os.environ.get("HEALTH_BRIDGE_TOKEN", "changeme")
SIGNED_INGEST_SECRET = os.environ.get("HEALTH_BRIDGE_SIGNED_INGEST_SECRET", "")
SIGNED_INGEST_DEVICE = os.environ.get("HEALTH_BRIDGE_SIGNED_INGEST_DEVICE", "pixel9a-chicho")
SIGNED_INGEST_MAX_SKEW_SECONDS = int(os.environ.get("HEALTH_BRIDGE_SIGNED_INGEST_MAX_SKEW_SECONDS", "300"))
SIGNED_INGEST_MAX_BODY_BYTES = int(os.environ.get("HEALTH_BRIDGE_SIGNED_INGEST_MAX_BODY_BYTES", str(2 * 1024 * 1024)))
DEFAULT_TIMEZONE = os.environ.get("HEALTH_BRIDGE_TIMEZONE", "America/Argentina/Buenos_Aires")
DEFAULT_SUBJECT = os.environ.get("HEALTH_BRIDGE_DEFAULT_SUBJECT", "chicho")

SUMMARY_COLUMNS = [
    "subject", "date", "steps", "distance_m", "floors", "active_minutes",
    "exercise_minutes", "workout_minutes", "auto_activity_minutes",
    "calories", "active_kcal", "total_kcal", "nutrition_kcal",
    "heart_rate_avg", "heart_rate_min", "heart_rate_max", "sleep_minutes",
    "sleep_duration_minutes", "sleep_awake_minutes", "sleep_light_minutes",
    "sleep_deep_minutes", "sleep_rem_minutes", "sleep_efficiency",
    "workouts", "workout_count", "auto_activity_count", "weight_kg",
    "body_fat_percentage", "height_m", "protein_g", "carbs_g", "fat_g",
    "hydration_ml", "updated_at",
]

SOURCE_RULES = {
    "com.hevy": {"class": "explicit_workout", "label": "Hevy", "trust": "high"},
    "com.google.android.apps.fitness": {"class": "auto_activity", "label": "Google Fit", "trust": "medium"},
    "com.fitbit.FitbitMobile": {"class": "wearable_activity", "label": "Fitbit", "trust": "high"},
    "health_connect": {"class": "legacy_unknown", "label": "Health Connect legacy", "trust": "low"},
}
EXERCISE_TYPE_STRENGTH = {"70"}


def init_db():
    conn = sqlite3.connect(DB_PATH)
    conn.execute("PRAGMA journal_mode=WAL")
    conn.executescript("""
        CREATE TABLE IF NOT EXISTS raw_data (
            id INTEGER PRIMARY KEY AUTOINCREMENT,
            received_at TEXT NOT NULL DEFAULT (datetime('now')),
            subject TEXT NOT NULL DEFAULT 'chicho',
            data_type TEXT NOT NULL,
            record_key TEXT,
            local_date TEXT,
            source TEXT,
            data_json TEXT NOT NULL
        );
        CREATE INDEX IF NOT EXISTS idx_raw_data_type ON raw_data(data_type);
        CREATE INDEX IF NOT EXISTS idx_raw_data_received ON raw_data(received_at);

        CREATE TABLE IF NOT EXISTS ingest_nonces (
            device TEXT NOT NULL,
            nonce TEXT NOT NULL,
            timestamp INTEGER NOT NULL,
            received_at TEXT NOT NULL DEFAULT (datetime('now')),
            PRIMARY KEY (device, nonce)
        );
        CREATE INDEX IF NOT EXISTS idx_ingest_nonces_received ON ingest_nonces(received_at);

        CREATE TABLE IF NOT EXISTS daily_summary (
            subject TEXT NOT NULL DEFAULT 'chicho',
            date TEXT NOT NULL,
            steps INTEGER DEFAULT 0,
            distance_m REAL DEFAULT 0,
            floors REAL DEFAULT 0,
            active_minutes INTEGER DEFAULT 0,
            exercise_minutes INTEGER DEFAULT 0,
            workout_minutes INTEGER DEFAULT 0,
            auto_activity_minutes INTEGER DEFAULT 0,
            calories REAL DEFAULT 0,
            active_kcal REAL DEFAULT 0,
            total_kcal REAL DEFAULT 0,
            nutrition_kcal REAL DEFAULT 0,
            heart_rate_avg REAL,
            heart_rate_min REAL,
            heart_rate_max REAL,
            sleep_minutes INTEGER DEFAULT 0,
            sleep_duration_minutes INTEGER DEFAULT 0,
            sleep_awake_minutes INTEGER DEFAULT 0,
            sleep_light_minutes INTEGER DEFAULT 0,
            sleep_deep_minutes INTEGER DEFAULT 0,
            sleep_rem_minutes INTEGER DEFAULT 0,
            sleep_efficiency REAL,
            workouts INTEGER DEFAULT 0,
            workout_count INTEGER DEFAULT 0,
            auto_activity_count INTEGER DEFAULT 0,
            weight_kg REAL,
            body_fat_percentage REAL,
            height_m REAL,
            protein_g REAL DEFAULT 0,
            carbs_g REAL DEFAULT 0,
            fat_g REAL DEFAULT 0,
            hydration_ml REAL DEFAULT 0,
            updated_at TEXT NOT NULL DEFAULT (datetime('now')),
            PRIMARY KEY (subject, date)
        );
    """)
    _migrate_schema(conn)
    conn.commit()
    return conn


def _table_columns(conn: sqlite3.Connection, table: str) -> dict:
    return {r[1]: r for r in conn.execute(f"PRAGMA table_info({table})").fetchall()}


def _migrate_schema(conn: sqlite3.Connection):
    raw_cols = _table_columns(conn, "raw_data")
    for col, ddl in {
        "subject": "ALTER TABLE raw_data ADD COLUMN subject TEXT NOT NULL DEFAULT 'chicho'",
        "record_key": "ALTER TABLE raw_data ADD COLUMN record_key TEXT",
        "local_date": "ALTER TABLE raw_data ADD COLUMN local_date TEXT",
        "source": "ALTER TABLE raw_data ADD COLUMN source TEXT",
    }.items():
        if col not in raw_cols:
            conn.execute(ddl)

    summary_cols = _table_columns(conn, "daily_summary")
    if "subject" not in summary_cols or not _daily_summary_has_subject_pk(conn):
        conn.execute("ALTER TABLE daily_summary RENAME TO daily_summary_legacy")
        _create_daily_summary_table(conn)
        legacy_cols = _table_columns(conn, "daily_summary_legacy")
        select_exprs = []
        for col in SUMMARY_COLUMNS:
            if col in legacy_cols:
                select_exprs.append(col)
            elif col == "subject":
                select_exprs.append("'chicho'")
            elif col == "updated_at":
                select_exprs.append("datetime('now')")
            else:
                select_exprs.append("0")
        conn.execute(
            f"INSERT OR REPLACE INTO daily_summary ({', '.join(SUMMARY_COLUMNS)}) "
            f"SELECT {', '.join(select_exprs)} FROM daily_summary_legacy"
        )
        conn.execute("DROP TABLE daily_summary_legacy")
    else:
        summary_cols = _table_columns(conn, "daily_summary")
        for col, ddl in {
            "distance_m": "ALTER TABLE daily_summary ADD COLUMN distance_m REAL DEFAULT 0",
            "floors": "ALTER TABLE daily_summary ADD COLUMN floors REAL DEFAULT 0",
            "exercise_minutes": "ALTER TABLE daily_summary ADD COLUMN exercise_minutes INTEGER DEFAULT 0",
            "workout_minutes": "ALTER TABLE daily_summary ADD COLUMN workout_minutes INTEGER DEFAULT 0",
            "auto_activity_minutes": "ALTER TABLE daily_summary ADD COLUMN auto_activity_minutes INTEGER DEFAULT 0",
            "active_kcal": "ALTER TABLE daily_summary ADD COLUMN active_kcal REAL DEFAULT 0",
            "total_kcal": "ALTER TABLE daily_summary ADD COLUMN total_kcal REAL DEFAULT 0",
            "nutrition_kcal": "ALTER TABLE daily_summary ADD COLUMN nutrition_kcal REAL DEFAULT 0",
            "workouts": "ALTER TABLE daily_summary ADD COLUMN workouts INTEGER DEFAULT 0",
            "workout_count": "ALTER TABLE daily_summary ADD COLUMN workout_count INTEGER DEFAULT 0",
            "auto_activity_count": "ALTER TABLE daily_summary ADD COLUMN auto_activity_count INTEGER DEFAULT 0",
            "body_fat_percentage": "ALTER TABLE daily_summary ADD COLUMN body_fat_percentage REAL",
            "height_m": "ALTER TABLE daily_summary ADD COLUMN height_m REAL",
            "protein_g": "ALTER TABLE daily_summary ADD COLUMN protein_g REAL DEFAULT 0",
            "carbs_g": "ALTER TABLE daily_summary ADD COLUMN carbs_g REAL DEFAULT 0",
            "fat_g": "ALTER TABLE daily_summary ADD COLUMN fat_g REAL DEFAULT 0",
            "hydration_ml": "ALTER TABLE daily_summary ADD COLUMN hydration_ml REAL DEFAULT 0",
            "sleep_duration_minutes": "ALTER TABLE daily_summary ADD COLUMN sleep_duration_minutes INTEGER DEFAULT 0",
            "sleep_awake_minutes": "ALTER TABLE daily_summary ADD COLUMN sleep_awake_minutes INTEGER DEFAULT 0",
            "sleep_light_minutes": "ALTER TABLE daily_summary ADD COLUMN sleep_light_minutes INTEGER DEFAULT 0",
            "sleep_deep_minutes": "ALTER TABLE daily_summary ADD COLUMN sleep_deep_minutes INTEGER DEFAULT 0",
            "sleep_rem_minutes": "ALTER TABLE daily_summary ADD COLUMN sleep_rem_minutes INTEGER DEFAULT 0",
            "sleep_efficiency": "ALTER TABLE daily_summary ADD COLUMN sleep_efficiency REAL",
        }.items():
            if col not in summary_cols:
                conn.execute(ddl)

    index_sql = conn.execute(
        "SELECT sql FROM sqlite_master WHERE type = 'index' AND name = 'idx_raw_data_record_key'"
    ).fetchone()
    if index_sql and "WHERE" in (index_sql[0] or "").upper():
        conn.execute("DROP INDEX idx_raw_data_record_key")

    _backfill_legacy_raw_metadata(conn)
    conn.execute("CREATE INDEX IF NOT EXISTS idx_raw_data_subject_date ON raw_data(subject, local_date)")
    conn.execute("CREATE INDEX IF NOT EXISTS idx_daily_summary_subject_date ON daily_summary(subject, date)")
    conn.execute("CREATE UNIQUE INDEX IF NOT EXISTS idx_raw_data_record_key ON raw_data(record_key)")


def _create_daily_summary_table(conn: sqlite3.Connection):
    conn.executescript("""
        CREATE TABLE daily_summary (
            subject TEXT NOT NULL DEFAULT 'chicho',
            date TEXT NOT NULL,
            steps INTEGER DEFAULT 0,
            distance_m REAL DEFAULT 0,
            floors REAL DEFAULT 0,
            active_minutes INTEGER DEFAULT 0,
            exercise_minutes INTEGER DEFAULT 0,
            workout_minutes INTEGER DEFAULT 0,
            auto_activity_minutes INTEGER DEFAULT 0,
            calories REAL DEFAULT 0,
            active_kcal REAL DEFAULT 0,
            total_kcal REAL DEFAULT 0,
            nutrition_kcal REAL DEFAULT 0,
            heart_rate_avg REAL,
            heart_rate_min REAL,
            heart_rate_max REAL,
            sleep_minutes INTEGER DEFAULT 0,
            sleep_duration_minutes INTEGER DEFAULT 0,
            sleep_awake_minutes INTEGER DEFAULT 0,
            sleep_light_minutes INTEGER DEFAULT 0,
            sleep_deep_minutes INTEGER DEFAULT 0,
            sleep_rem_minutes INTEGER DEFAULT 0,
            sleep_efficiency REAL,
            workouts INTEGER DEFAULT 0,
            workout_count INTEGER DEFAULT 0,
            auto_activity_count INTEGER DEFAULT 0,
            weight_kg REAL,
            body_fat_percentage REAL,
            height_m REAL,
            protein_g REAL DEFAULT 0,
            carbs_g REAL DEFAULT 0,
            fat_g REAL DEFAULT 0,
            hydration_ml REAL DEFAULT 0,
            updated_at TEXT NOT NULL DEFAULT (datetime('now')),
            PRIMARY KEY (subject, date)
        );
    """)


def _daily_summary_has_subject_pk(conn: sqlite3.Connection) -> bool:
    cols = conn.execute("PRAGMA table_info(daily_summary)").fetchall()
    pk_cols = [r[1] for r in sorted([r for r in cols if r[5]], key=lambda r: r[5])]
    return pk_cols == ["subject", "date"]


def _backfill_legacy_raw_metadata(conn: sqlite3.Connection):
    rows = conn.execute(
        "SELECT id, data_type, data_json FROM raw_data WHERE record_key IS NULL OR local_date IS NULL OR source IS NULL OR subject IS NULL"
    ).fetchall()
    for row_id, data_type, data_json in rows:
        try:
            record = json.loads(data_json)
        except Exception:
            continue
        normalized = _normalize_record(record, DEFAULT_TIMEZONE, DEFAULT_SUBJECT)
        record_key = normalized["record_key"]
        exists = conn.execute("SELECT id FROM raw_data WHERE record_key = ? AND id != ?", (record_key, row_id)).fetchone()
        if exists:
            record_key = f"legacy:{row_id}:{record_key}"
            normalized["record"]["record_key"] = record_key
        conn.execute(
            """
            UPDATE raw_data
            SET subject = ?, data_type = ?, record_key = ?, local_date = ?, source = ?, data_json = ?
            WHERE id = ?
            """,
            (
                normalized["subject"], normalized["data_type"] or data_type, record_key,
                normalized["local_date"], normalized["source"],
                json.dumps(normalized["record"], separators=(",", ":"), sort_keys=True), row_id,
            ),
        )


@asynccontextmanager
async def lifespan(app: FastAPI):
    app.state.db = init_db()
    yield
    app.state.db.close()


app = FastAPI(title="Health Bridge", lifespan=lifespan)
app.add_middleware(CORSMiddleware, allow_origins=["*"], allow_methods=["*"], allow_headers=["*"])


async def check_token(request: Request):
    token = request.headers.get("Authorization", "").replace("Bearer ", "")
    if token != AUTH_TOKEN:
        raise HTTPException(status_code=401, detail="Invalid token")
    return token


def _verify_signed_ingest(request: Request, raw_body: bytes, db: sqlite3.Connection):
    if not SIGNED_INGEST_SECRET:
        raise HTTPException(status_code=503, detail="Signed ingest is not configured")
    if len(raw_body) > SIGNED_INGEST_MAX_BODY_BYTES:
        raise HTTPException(status_code=413, detail="Payload too large")

    device = request.headers.get("X-HB-Device", "")
    nonce = request.headers.get("X-HB-Nonce", "")
    timestamp_raw = request.headers.get("X-HB-Timestamp", "")
    signature = request.headers.get("X-HB-Signature", "")
    if device != SIGNED_INGEST_DEVICE or not nonce or not timestamp_raw or not signature:
        raise HTTPException(status_code=401, detail="Missing or invalid signed ingest headers")

    try:
        timestamp = int(timestamp_raw)
    except ValueError:
        raise HTTPException(status_code=401, detail="Invalid timestamp")
    if abs(int(time.time()) - timestamp) > SIGNED_INGEST_MAX_SKEW_SECONDS:
        raise HTTPException(status_code=401, detail="Timestamp outside allowed window")

    body_hash = hashlib.sha256(raw_body).hexdigest()
    signed_payload = f"{device}\n{timestamp}\n{nonce}\n{body_hash}".encode("utf-8")
    expected = hmac.new(SIGNED_INGEST_SECRET.encode("utf-8"), signed_payload, hashlib.sha256).hexdigest()
    if not hmac.compare_digest(expected, signature):
        raise HTTPException(status_code=401, detail="Invalid signature")

    try:
        db.execute(
            "INSERT INTO ingest_nonces (device, nonce, timestamp, received_at) VALUES (?, ?, ?, datetime('now'))",
            (device, nonce, timestamp),
        )
    except sqlite3.IntegrityError:
        raise HTTPException(status_code=409, detail="Replay nonce")
    db.execute("DELETE FROM ingest_nonces WHERE received_at < datetime('now', '-1 day')")


def _ingest_body(db: sqlite3.Connection, body: dict) -> dict:
    records = body.get("records", [])
    client_tz = body.get("timezone") or DEFAULT_TIMEZONE
    subject = body.get("subject") or DEFAULT_SUBJECT
    if not records:
        raise HTTPException(status_code=400, detail="No records provided")

    affected = set()
    count = 0
    for record in records:
        normalized = _normalize_record(record, client_tz, subject)
        db.execute(
            """
            INSERT INTO raw_data (subject, data_type, record_key, local_date, source, data_json, received_at)
            VALUES (?, ?, ?, ?, ?, ?, datetime('now'))
            ON CONFLICT(record_key) DO UPDATE SET
                subject = excluded.subject,
                data_type = excluded.data_type,
                local_date = excluded.local_date,
                source = excluded.source,
                data_json = excluded.data_json,
                received_at = datetime('now')
            """,
            (
                normalized["subject"], normalized["data_type"], normalized["record_key"],
                normalized["local_date"], normalized["source"],
                json.dumps(normalized["record"], separators=(",", ":"), sort_keys=True),
            ),
        )
        if normalized["local_date"]:
            affected.add((normalized["subject"], normalized["local_date"]))
        count += 1
    db.commit()

    rebuilt = []
    for subj, local_date in sorted(affected):
        _rebuild_daily_summary(db, subj, local_date)
        rebuilt.append({"subject": subj, "date": local_date})
    return {"status": "ok", "records": count, "dates_rebuilt": rebuilt}


@app.get("/health")
async def health():
    return {"status": "ok", "time": datetime.now(timezone.utc).isoformat()}


@app.get("/")
async def root():
    return {"status": "ok", "service": "health-bridge"}


@app.get("/download")
async def download_apk():
    apk_path = os.path.expanduser("~/health-bridge/static/health-bridge.apk")
    if not os.path.exists(apk_path):
        raise HTTPException(status_code=404, detail="APK not found. Build it first.")
    return FileResponse(apk_path, media_type="application/vnd.android.package-archive", filename="health-bridge.apk")


@app.post("/ingest")
async def ingest(request: Request):
    await check_token(request)
    body = await request.json()
    db: sqlite3.Connection = app.state.db
    return _ingest_body(db, body)


@app.post("/ingest_signed")
async def ingest_signed(request: Request):
    raw_body = await request.body()
    db: sqlite3.Connection = app.state.db
    _verify_signed_ingest(request, raw_body, db)
    try:
        body = json.loads(raw_body)
    except json.JSONDecodeError:
        raise HTTPException(status_code=400, detail="Invalid JSON")
    return _ingest_body(db, body)


@app.get("/data")
async def get_data(request: Request, data_type: str = None, subject: str = None, limit: int = 100):
    await check_token(request)
    db: sqlite3.Connection = app.state.db
    subject = subject or DEFAULT_SUBJECT
    if data_type:
        rows = db.execute(
            "SELECT data_json, received_at FROM raw_data WHERE subject = ? AND data_type = ? ORDER BY id DESC LIMIT ?",
            (subject, data_type, limit),
        ).fetchall()
    else:
        rows = db.execute(
            "SELECT data_json, received_at FROM raw_data WHERE subject = ? ORDER BY id DESC LIMIT ?",
            (subject, limit),
        ).fetchall()
    return {"subject": subject, "count": len(rows), "data": [{"record": json.loads(r[0]), "received_at": r[1]} for r in rows]}


@app.get("/summary")
async def get_summary(request: Request, days: int = 7, subject: str = None):
    await check_token(request)
    db: sqlite3.Connection = app.state.db
    subject = subject or DEFAULT_SUBJECT
    rows = db.execute(
        f"SELECT {', '.join(SUMMARY_COLUMNS)} FROM daily_summary WHERE subject = ? ORDER BY date DESC LIMIT ?",
        (subject, days),
    ).fetchall()
    return {"subject": subject, "days": [_summary_dict(r) for r in rows]}


@app.get("/overview")
async def get_overview(request: Request, days: int = 7, subject: str = None):
    await check_token(request)
    db: sqlite3.Connection = app.state.db
    subject = subject or DEFAULT_SUBJECT
    rows = db.execute(
        f"SELECT {', '.join(SUMMARY_COLUMNS)} FROM daily_summary WHERE subject = ? ORDER BY date DESC LIMIT ?",
        (subject, days),
    ).fetchall()
    summaries = [_summary_dict(r) for r in rows]
    freshness = db.execute(
        """
        SELECT MAX(received_at), COUNT(*), COUNT(DISTINCT local_date), COUNT(DISTINCT source)
        FROM raw_data WHERE subject = ?
        """,
        (subject,),
    ).fetchone()
    return {
        "subject": subject,
        "timezone": DEFAULT_TIMEZONE,
        "freshness": {
            "last_sync_at": freshness[0],
            "raw_records": freshness[1],
            "covered_days": freshness[2],
            "sources": freshness[3],
        },
        "today": summaries[0] if summaries else None,
        "days": summaries,
        "legend": {
            "workout_minutes": "explicit workout sessions, primarily Hevy/strength",
            "auto_activity_minutes": "auto-detected activity, usually Google Fit walking/movement",
            "exercise_minutes": "workout_minutes + auto_activity_minutes",
            "calories": "burned calories only; nutrition_kcal is intake",
        },
    }


@app.get("/debug/day")
async def debug_day(request: Request, date: str, subject: str = None):
    await check_token(request)
    db: sqlite3.Connection = app.state.db
    subject = subject or DEFAULT_SUBJECT
    return _debug_day(db, subject, date)


@app.post("/rebuild-summary")
async def rebuild_summary(request: Request, subject: str = None):
    await check_token(request)
    db: sqlite3.Connection = app.state.db
    params = []
    where = "WHERE local_date IS NOT NULL"
    if subject:
        where += " AND subject = ?"
        params.append(subject)
    pairs = db.execute(
        f"SELECT DISTINCT subject, local_date FROM raw_data {where} ORDER BY subject, local_date",
        params,
    ).fetchall()
    rebuilt = []
    for subj, local_date in pairs:
        _rebuild_daily_summary(db, subj, local_date)
        rebuilt.append({"subject": subj, "date": local_date})
    return {"status": "ok", "dates_rebuilt": rebuilt}


def _summary_dict(row) -> dict:
    return dict(zip(SUMMARY_COLUMNS, row))


def _normalize_record(record: dict, client_tz: str, default_subject: str = DEFAULT_SUBJECT) -> dict:
    data_type = record.get("type", "unknown")
    subject = record.get("subject") or default_subject or DEFAULT_SUBJECT
    local_date = record.get("local_date") or record.get("date") or _local_date_for_record(record, client_tz)
    normalized = dict(record)
    normalized["subject"] = subject
    if local_date:
        normalized["local_date"] = local_date
    source = normalized.get("source") or normalized.get("app") or normalized.get("package_name") or "health_connect"
    normalized["source"] = source
    normalized["source_class"] = classify_record(normalized)["class"]
    record_key = normalized.get("record_key") or normalized.get("id")
    if record_key and not str(record_key).startswith(f"{subject}:"):
        record_key = f"{subject}:{record_key}"
    if not record_key:
        record_key = _stable_record_key(subject, data_type, normalized, local_date)
    normalized["record_key"] = record_key
    normalized["fingerprint"] = canonical_fingerprint(normalized)
    return {
        "subject": subject,
        "data_type": data_type,
        "record_key": record_key,
        "local_date": local_date,
        "source": source,
        "record": normalized,
    }


def _local_date_for_record(record: dict, client_tz: str) -> str | None:
    zone = _zone(client_tz)
    for field in ("time", "start", "period_start"):
        value = record.get(field)
        if value:
            try:
                return _parse_instant(value).astimezone(zone).date().isoformat()
            except ValueError:
                continue
    return None


def _stable_record_key(subject: str, data_type: str, record: dict, local_date: str | None) -> str:
    if data_type in {"daily_aggregate", "aggregated", "heart_rate"} and local_date:
        return f"{subject}:{data_type}:{local_date}"
    digest = hashlib.sha256(json.dumps(_identity(record, subject, data_type, local_date), sort_keys=True, separators=(",", ":")).encode()).hexdigest()[:24]
    return f"{subject}:{data_type}:{digest}"


def _identity(record: dict, subject: str, data_type: str, local_date: str | None) -> dict:
    identity = {
        "subject": subject,
        "type": data_type,
        "local_date": local_date,
        "time": record.get("time"),
        "start": record.get("start"),
        "end": record.get("end"),
        "period_start": record.get("period_start"),
        "period_end": record.get("period_end"),
        "source": record.get("source"),
        "value": record.get("value") or record.get("minutes") or record.get("steps"),
    }
    return identity if any(v is not None for v in identity.values()) else {"subject": subject, **record}


def _zone(name: str) -> ZoneInfo:
    try:
        return ZoneInfo(name)
    except Exception:
        return ZoneInfo(DEFAULT_TIMEZONE)


def _parse_instant(value: str) -> datetime:
    return datetime.fromisoformat(value.replace("Z", "+00:00"))


def _latest(records: list[dict], field: str = "time") -> dict | None:
    candidates = [r for r in records if r.get(field)]
    if not candidates:
        return records[-1] if records else None
    return max(candidates, key=lambda r: r.get(field))


def classify_record(record: dict) -> dict:
    source = record.get("source") or "health_connect"
    t = record.get("type") or "unknown"
    rule = SOURCE_RULES.get(source, {"class": "unknown", "label": source, "trust": "unknown"}).copy()
    if t == "exercise":
        title = (record.get("title") or "").lower()
        exercise_type = str(record.get("exercise_type") or "")
        if source == "com.hevy" or exercise_type in EXERCISE_TYPE_STRENGTH or "min-max" in title:
            rule.update({"class": "explicit_workout", "trust": "high"})
        elif source == "com.google.android.apps.fitness":
            rule.update({"class": "auto_activity", "trust": "medium"})
    return rule


def canonical_fingerprint(record: dict) -> str:
    t = record.get("type")
    if t in {"exercise", "sleep", "nutrition", "hydration"}:
        identity = {
            "subject": record.get("subject"),
            "type": t,
            "local_date": record.get("local_date"),
            "start": record.get("start"),
            "end": record.get("end"),
            "minutes": int(record.get("minutes") or 0) if record.get("minutes") is not None else None,
            "exercise_type": str(record.get("exercise_type")) if record.get("exercise_type") is not None else None,
            "title": record.get("title"),
            "energy_kcal": record.get("energy_kcal"),
            "volume_ml": record.get("volume_ml"),
        }
    elif t in {"daily_aggregate", "aggregated", "heart_rate"}:
        identity = {"subject": record.get("subject"), "type": t, "local_date": record.get("local_date")}
    else:
        identity = _identity(record, record.get("subject"), t, record.get("local_date"))
    return hashlib.sha256(json.dumps(identity, sort_keys=True, separators=(",", ":")).encode()).hexdigest()[:24]


def _is_stale_wide_aggregate(record: dict) -> bool:
    """Ignore legacy aggregates that cover multi-day windows but were stored as one local day."""
    if record.get("type") not in {"daily_aggregate", "aggregated"}:
        return False
    start = record.get("period_start")
    end = record.get("period_end")
    if not start or not end:
        return False
    try:
        start_dt = _parse_instant(start)
        end_dt = _parse_instant(end)
    except Exception:
        return False
    return (end_dt - start_dt).total_seconds() > 36 * 3600


def dedupe_records(records: list[dict]) -> tuple[list[dict], list[dict]]:
    """Deduplicate same semantic records, preferring named app sources over legacy health_connect."""
    ranked = []
    for idx, record in enumerate(records):
        classification = classify_record(record)
        fingerprint = record.get("fingerprint") or canonical_fingerprint(record)
        source = record.get("source") or "health_connect"
        trust_rank = {"high": 3, "medium": 2, "low": 1, "unknown": 0}.get(classification.get("trust"), 0)
        source_rank = 0 if source == "health_connect" else 1
        ranked.append((fingerprint, trust_rank, source_rank, idx, record, classification))

    winners = {}
    ignored = []
    for fingerprint, trust_rank, source_rank, idx, record, classification in ranked:
        candidate = (trust_rank, source_rank, idx, record, classification)
        current = winners.get(fingerprint)
        if current is None or candidate[:3] > current[:3]:
            if current is not None:
                ignored.append(_debug_record(current[3], current[4], reason="duplicate_lower_priority"))
            winners[fingerprint] = candidate
        else:
            ignored.append(_debug_record(record, classification, reason="duplicate_lower_priority"))
    kept = [v[3] for v in sorted(winners.values(), key=lambda x: x[2])]
    return kept, ignored


def _records_for_day(db: sqlite3.Connection, subject: str, local_date: str) -> list[dict]:
    rows = db.execute(
        "SELECT data_json FROM raw_data WHERE subject = ? AND local_date = ? ORDER BY received_at ASC, id ASC",
        (subject, local_date),
    ).fetchall()
    return [json.loads(row[0]) for row in rows]


def _debug_record(record: dict, classification: dict | None = None, reason: str | None = None) -> dict:
    classification = classification or classify_record(record)
    out = {
        "type": record.get("type"),
        "source": record.get("source"),
        "source_label": classification.get("label"),
        "classification": classification.get("class"),
        "trust": classification.get("trust"),
        "record_key": record.get("record_key"),
        "fingerprint": record.get("fingerprint") or canonical_fingerprint(record),
        "local_date": record.get("local_date"),
    }
    for key in ("minutes", "steps", "distance_m", "active_kcal", "total_kcal", "energy_kcal", "volume_ml", "start", "end", "time", "exercise_type", "title"):
        if record.get(key) is not None:
            out[key] = record.get(key)
    if reason:
        out["reason"] = reason
    return out


def compute_daily_summary(records: list[dict]) -> tuple[dict, dict]:
    records, ignored = dedupe_records(records)
    summary = {
        "steps": 0, "distance_m": 0.0, "floors": 0.0, "active_minutes": 0,
        "exercise_minutes": 0, "workout_minutes": 0, "auto_activity_minutes": 0,
        "calories": 0.0, "active_kcal": 0.0, "total_kcal": 0.0, "nutrition_kcal": 0.0,
        "heart_rate_avg": None, "heart_rate_min": None, "heart_rate_max": None,
        "sleep_minutes": 0, "sleep_duration_minutes": 0, "sleep_awake_minutes": 0,
        "sleep_light_minutes": 0, "sleep_deep_minutes": 0, "sleep_rem_minutes": 0,
        "sleep_efficiency": None, "workouts": 0, "workout_count": 0, "auto_activity_count": 0,
        "weight_kg": None, "body_fat_percentage": None, "height_m": None,
        "protein_g": 0.0, "carbs_g": 0.0, "fat_g": 0.0, "hydration_ml": 0.0,
    }
    provenance = {"used": [], "ignored": ignored, "by_metric": {}}
    hr_values, weights, body_fats, heights = [], [], [], []

    def add_metric(metric: str, value, record: dict, classification: dict):
        provenance["by_metric"].setdefault(metric, []).append(_debug_record(record, classification))
        provenance["used"].append(_debug_record(record, classification))

    for r in records:
        t = r.get("type", "")
        cls = classify_record(r)
        if t in {"daily_aggregate", "aggregated"}:
            if _is_stale_wide_aggregate(r):
                provenance["ignored"].append(_debug_record(r, cls, reason="stale_wide_aggregate"))
                continue
            for metric, field, caster in [
                ("steps", "steps", int), ("distance_m", "distance_m", float), ("floors", "floors", float),
                ("active_kcal", "active_kcal", float), ("total_kcal", "total_kcal", float),
            ]:
                summary[metric] = caster(r.get(field, 0) or 0)
                add_metric(metric, r.get(field, 0) or 0, r, cls)
        elif t == "steps":
            summary["steps"] += int(r.get("count", 0) or 0)
            add_metric("steps", r.get("count", 0), r, cls)
        elif t == "active_minutes":
            summary["active_minutes"] += int(r.get("minutes", 0) or 0)
            add_metric("active_minutes", r.get("minutes", 0), r, cls)
        elif t == "calories":
            summary["total_kcal"] += float(r.get("kcal", 0) or 0)
            add_metric("total_kcal", r.get("kcal", 0), r, cls)
        elif t == "heart_rate":
            samples = r.get("samples")
            if samples:
                for s in samples:
                    if s.get("bpm"):
                        hr_values.append(float(s["bpm"]))
            elif r.get("bpm"):
                hr_values.append(float(r["bpm"]))
            elif r.get("avg_bpm") is not None:
                hr_values.append(float(r["avg_bpm"]))
            add_metric("heart_rate", r.get("avg_bpm") or r.get("bpm"), r, cls)
        elif t == "sleep":
            summary["sleep_minutes"] += int(r.get("minutes", 0) or 0)
            summary["sleep_duration_minutes"] += int(r.get("duration_minutes", r.get("minutes", 0)) or 0)
            summary["sleep_awake_minutes"] += int(r.get("awake_minutes", 0) or 0)
            summary["sleep_light_minutes"] += int(r.get("light_minutes", 0) or 0)
            summary["sleep_deep_minutes"] += int(r.get("deep_minutes", 0) or 0)
            summary["sleep_rem_minutes"] += int(r.get("rem_minutes", 0) or 0)
            add_metric("sleep_minutes", r.get("minutes", 0), r, cls)
            for metric in ("sleep_duration_minutes", "sleep_awake_minutes", "sleep_light_minutes", "sleep_deep_minutes", "sleep_rem_minutes"):
                if r.get(metric.replace("sleep_", "")) is not None or metric == "sleep_duration_minutes":
                    add_metric(metric, summary[metric], r, cls)
        elif t == "exercise":
            minutes = int(r.get("minutes", 0) or 0)
            if cls["class"] == "explicit_workout":
                summary["workout_minutes"] += minutes
                summary["workout_count"] += 1
                add_metric("workout_minutes", minutes, r, cls)
            else:
                summary["auto_activity_minutes"] += minutes
                summary["auto_activity_count"] += 1
                add_metric("auto_activity_minutes", minutes, r, cls)
        elif t == "weight":
            weights.append(r)
            add_metric("weight_kg", r.get("kg"), r, cls)
        elif t == "body_fat":
            body_fats.append(r)
            add_metric("body_fat_percentage", r.get("percentage"), r, cls)
        elif t == "height":
            heights.append(r)
            add_metric("height_m", r.get("meters"), r, cls)
        elif t == "nutrition":
            for metric, field in [("protein_g", "protein_g"), ("carbs_g", "carbs_g"), ("fat_g", "fat_g"), ("nutrition_kcal", "energy_kcal")]:
                summary[metric] += float(r.get(field, 0) or 0)
                add_metric(metric, r.get(field, 0), r, cls)
        elif t == "hydration":
            summary["hydration_ml"] += float(r.get("volume_ml", 0) or 0)
            add_metric("hydration_ml", r.get("volume_ml", 0), r, cls)

    summary["exercise_minutes"] = summary["workout_minutes"] + summary["auto_activity_minutes"]
    summary["workouts"] = summary["workout_count"]
    summary["calories"] = summary["total_kcal"] or summary["active_kcal"]
    if hr_values:
        summary["heart_rate_avg"] = sum(hr_values) / len(hr_values)
        summary["heart_rate_min"] = min(hr_values)
        summary["heart_rate_max"] = max(hr_values)
    if summary["sleep_duration_minutes"]:
        summary["sleep_efficiency"] = summary["sleep_minutes"] / summary["sleep_duration_minutes"]
    latest_weight = _latest(weights)
    latest_body_fat = _latest(body_fats)
    latest_height = _latest(heights)
    summary["weight_kg"] = float(latest_weight["kg"]) if latest_weight and latest_weight.get("kg") is not None else None
    summary["body_fat_percentage"] = float(latest_body_fat["percentage"]) if latest_body_fat and latest_body_fat.get("percentage") is not None else None
    summary["height_m"] = float(latest_height["meters"]) if latest_height and latest_height.get("meters") is not None else None
    return summary, provenance


def _rebuild_daily_summary(db: sqlite3.Connection, subject: str, local_date: str):
    summary, _ = compute_daily_summary(_records_for_day(db, subject, local_date))
    values = {
        "subject": subject,
        "date": local_date,
        "updated_at": datetime.now(timezone.utc).replace(tzinfo=None).isoformat(timespec="seconds"),
        **summary,
    }
    columns = SUMMARY_COLUMNS
    placeholders = ", ".join(["?"] * len(columns))
    update_cols = [c for c in columns if c not in {"subject", "date"}]
    update_sql = ",\n            ".join(
        ["updated_at = datetime('now')" if c == "updated_at" else f"{c} = excluded.{c}" for c in update_cols]
    )
    db.execute(f"""
        INSERT INTO daily_summary ({', '.join(columns)})
        VALUES ({placeholders})
        ON CONFLICT(subject, date) DO UPDATE SET
            {update_sql}
    """, tuple(values.get(c) for c in columns))
    db.commit()


def _debug_day(db: sqlite3.Connection, subject: str, local_date: str) -> dict:
    raw = _records_for_day(db, subject, local_date)
    summary, provenance = compute_daily_summary(raw)
    row = db.execute(
        f"SELECT {', '.join(SUMMARY_COLUMNS)} FROM daily_summary WHERE subject = ? AND date = ?",
        (subject, local_date),
    ).fetchone()
    persisted = _summary_dict(row) if row else None
    source_totals = {}
    for r in raw:
        source = r.get("source") or "unknown"
        cls = classify_record(r)
        slot = source_totals.setdefault(source, {"classification": cls["class"], "records": 0, "minutes": 0, "steps": 0})
        slot["records"] += 1
        slot["minutes"] += int(r.get("minutes", 0) or 0)
        slot["steps"] += int(r.get("steps", 0) or 0)
    return {
        "subject": subject,
        "date": local_date,
        "computed_summary": summary,
        "persisted_summary": persisted,
        "source_totals": source_totals,
        "used_records": provenance["used"],
        "ignored_records": provenance["ignored"],
        "by_metric": provenance["by_metric"],
    }


if __name__ == "__main__":
    port = int(os.environ.get("PORT", "3007"))
    uvicorn.run(app, host="0.0.0.0", port=port)
