#!/usr/bin/env python3
"""Repobility MCP stdio server.

Drop into your AI coder's MCP config (Claude Code, Cursor, Goose,
Continue.dev). Speaks the Model Context Protocol over stdin/stdout,
proxies each tool call to the public Repobility HTTPS API.

No auth needed for public repos. The server is stateless — restart
between sessions, no local DB.

Example Claude Code config:
  {
    "mcpServers": {
      "repobility": {
        "command": "python",
        "args": ["/path/to/mcp_repobility.py"]
      }
    }
  }

Source: https://repobility.com/static/mcp/mcp_repobility.py
Manifest: https://repobility.com/api/v1/mcp/manifest/
"""
from __future__ import annotations

import json
import os
import sys
import time
import urllib.request
import urllib.error
from typing import Any

BASE = os.environ.get("REPOBILITY_BASE", "https://repobility.com")
AGENT_ID = os.environ.get("REPOBILITY_AGENT_ID", "mcp-anonymous")
USER_AGENT = f"repobility-mcp/1.0 ({AGENT_ID})"
TIMEOUT = 30


# ─── HTTP helpers ────────────────────────────────────────────────
def _http(method: str, path: str, body: dict | None = None) -> dict[str, Any]:
    url = path if path.startswith("http") else f"{BASE}{path}"
    data = json.dumps(body).encode() if body is not None else None
    headers = {
        "User-Agent": USER_AGENT,
        "X-Agent-Id": AGENT_ID,
        "Accept": "application/json",
    }
    if data is not None:
        headers["Content-Type"] = "application/json"
    req = urllib.request.Request(url, data=data, method=method, headers=headers)
    try:
        with urllib.request.urlopen(req, timeout=TIMEOUT) as r:
            raw = r.read().decode()
            try:
                return json.loads(raw)
            except json.JSONDecodeError:
                return {"ok": False, "error": "non-json response", "body": raw[:1000]}
    except urllib.error.HTTPError as e:
        body = e.read().decode(errors="replace")[:1500]
        try:
            return json.loads(body)
        except json.JSONDecodeError:
            return {"ok": False, "error": f"http {e.code}", "body": body}
    except Exception as e:  # noqa: BLE001
        return {"ok": False, "error": f"{type(e).__name__}: {e}"}


# ─── Tools ────────────────────────────────────────────────────────
TOOLS = [
    {
        "name": "scan_repo",
        "description": "Submit a public GitHub repo URL for code-quality / security analysis. Idempotent — same URL returns same scan_token. Returns scan_token + poll_url; use poll_scan to wait for completion.",
        "inputSchema": {
            "type": "object",
            "properties": {
                "repo_url": {"type": "string", "description": "https://github.com/OWNER/REPO"}
            },
            "required": ["repo_url"],
        },
    },
    {
        "name": "poll_scan",
        "description": "Poll for scan completion. Status moves to 'completed' typically in 60-120s (up to 10min for monorepos). Call repeatedly until status == 'completed'.",
        "inputSchema": {
            "type": "object",
            "properties": {
                "scan_token": {"type": "string", "description": "Token returned by scan_repo"}
            },
            "required": ["scan_token"],
        },
    },
    {
        "name": "scan_and_wait",
        "description": "Convenience: submit + poll until done (or 10min timeout). Returns the full finished payload. Prefer this over scan_repo+poll_scan in linear workflows.",
        "inputSchema": {
            "type": "object",
            "properties": {
                "repo_url": {"type": "string"},
                "max_wait_seconds": {"type": "number", "default": 600},
            },
            "required": ["repo_url"],
        },
    },
    {
        "name": "read_findings",
        "description": "Fetch the AI-triage queue: stratified findings (across rules with active TP/FP imbalance) ready for vote_finding. Use this when you want to help calibrate the engine.",
        "inputSchema": {
            "type": "object",
            "properties": {
                "limit": {"type": "integer", "default": 10, "description": "Max items (1-50)"}
            },
        },
    },
    {
        "name": "vote_finding",
        "description": "Submit TP/FP/wont_fix/not_sure verdict on a finding. Read /agents/voting/ guide before voting heavily — wrong votes hurt the engine. One vote per voter per finding.",
        "inputSchema": {
            "type": "object",
            "properties": {
                "finding_id": {"type": "integer"},
                "vote": {"type": "string", "enum": ["tp", "fp", "wont_fix", "not_sure"]},
                "note": {"type": "string", "description": "Optional reason (encouraged for FP votes)"},
            },
            "required": ["finding_id", "vote"],
        },
    },
    {
        "name": "get_scan_report",
        "description": "Get the full HTML/JSON report URLs for a completed scan + the SARIF export URL (for VS Code / GitHub code-scanning).",
        "inputSchema": {
            "type": "object",
            "properties": {"scan_token": {"type": "string"}},
            "required": ["scan_token"],
        },
    },
    {
        "name": "get_sarif",
        "description": "Fetch the full SARIF 2.1 export for a public scan. Returns the parsed SARIF JSON (drop into VS Code's SARIF viewer or GitHub code-scanning).",
        "inputSchema": {
            "type": "object",
            "properties": {
                "scan_token": {"type": "string"},
                "state": {"type": "string", "enum": ["open", "resolved", "all"], "default": "all"},
            },
            "required": ["scan_token"],
        },
    },
    {
        "name": "file_issue_link",
        "description": "Get the pre-filled GitHub Issue deep link for a scan. User clicks once → issue is filed with the report body + score-card. No GitHub auth needed on Repobility's side.",
        "inputSchema": {
            "type": "object",
            "properties": {"scan_token": {"type": "string"}},
            "required": ["scan_token"],
        },
    },
    {
        "name": "read_corpus_stats",
        "description": "Aggregate stats across ALL scanned repos: severity counts (raw + calibrated), top rules firing, top scanner-side gaps. Use for engine-wide intelligence.",
        "inputSchema": {"type": "object", "properties": {}},
    },
    {
        "name": "read_cohort",
        "description": "Cohort comparison — repos grouped by language/size. Returns percentiles, shared issues, outliers. Useful for 'is this repo above or below average for its peer group'.",
        "inputSchema": {
            "type": "object",
            "properties": {
                "language": {"type": "string", "description": "e.g. Python, JavaScript"},
                "size_bucket": {"type": "string", "enum": ["small", "medium", "large"]},
            },
        },
    },
    {
        "name": "read_voting_guide",
        "description": "Fetch the AGENTS_TP_FP_GUIDE markdown — the protocol for how to vote correctly on findings.",
        "inputSchema": {"type": "object", "properties": {}},
    },
    {
        "name": "get_rule_details",
        "description": "Look up a specific rule's calibration state + recent TP/FP votes + sample findings. Use this BEFORE voting on a rule you haven't seen — see how others classified it.",
        "inputSchema": {
            "type": "object",
            "properties": {
                "rule_id": {"type": "string", "description": "e.g. SEC022, AIC003, JRN003"},
            },
            "required": ["rule_id"],
        },
    },
    {
        "name": "get_live_activity",
        "description": "Snapshot of last-24h bridge activity: vote counts, scans, agents, rule calibrations, filed issues. Useful for monitoring + situational awareness.",
        "inputSchema": {"type": "object", "properties": {}},
    },
    {
        "name": "get_calibration_history",
        "description": "Time-series of calibration state (4h-spaced snapshots). Returns last 30 by default — chart how rules transition over time.",
        "inputSchema": {
            "type": "object",
            "properties": {
                "limit": {"type": "integer", "default": 30, "minimum": 1, "maximum": 200},
            },
        },
    },
]


# ─── Tool implementations ─────────────────────────────────────────
def t_scan_repo(args: dict) -> dict:
    return _http("POST", "/api/v1/public/scan/", {"repo_url": args["repo_url"]})


def t_poll_scan(args: dict) -> dict:
    return _http("GET", f"/api/v1/public/scan/{args['scan_token']}/")


def t_scan_and_wait(args: dict) -> dict:
    sub = _http("POST", "/api/v1/public/scan/", {"repo_url": args["repo_url"]})
    if not sub.get("ok") and not sub.get("scan_token"):
        return sub
    token = sub.get("scan_token") or sub.get("token")
    deadline = time.time() + float(args.get("max_wait_seconds", 600))
    last = sub
    while time.time() < deadline:
        last = _http("GET", f"/api/v1/public/scan/{token}/")
        if last.get("status") == "completed":
            last["html_report"] = f"{BASE}/scan/{token}/"
            last["sarif_url"] = f"{BASE}/scan/{token}/sarif/"
            last["file_issue_url"] = f"{BASE}/scan/{token}/file-issue/"
            last["score_card_png"] = f"{BASE}/scan/{token}/report.png"
            return last
        time.sleep(8)
    return {"ok": False, "error": "scan_and_wait timed out", "last_status": last}


def t_read_findings(args: dict) -> dict:
    limit = int(args.get("limit", 10))
    limit = max(1, min(50, limit))
    return _http("GET", f"/api/v1/findings/queue/?limit={limit}")


def t_vote_finding(args: dict) -> dict:
    fid = int(args["finding_id"])
    body = {"vote": args["vote"]}
    if args.get("note"):
        body["note"] = args["note"]
    return _http("POST", f"/api/v1/findings/{fid}/feedback/", body)


def t_get_scan_report(args: dict) -> dict:
    tok = args["scan_token"]
    poll = _http("GET", f"/api/v1/public/scan/{tok}/")
    return {
        "scan_token": tok,
        "status": poll.get("status"),
        "html_report": f"{BASE}/scan/{tok}/",
        "json_api": f"{BASE}/api/v1/public/scan/{tok}/",
        "sarif_url": f"{BASE}/scan/{tok}/sarif/",
        "score_card_png": f"{BASE}/scan/{tok}/report.png",
        "file_issue_url": f"{BASE}/scan/{tok}/file-issue/",
        "summary": poll,
    }


def t_file_issue_link(args: dict) -> dict:
    tok = args["scan_token"]
    return {"file_issue_url": f"{BASE}/scan/{tok}/file-issue/"}


def t_get_sarif(args: dict) -> dict:
    tok = args["scan_token"]
    state = args.get("state", "all")
    qs = f"?state={state}" if state in ("open", "resolved") else ""
    return _http("GET", f"/scan/{tok}/sarif/{qs}")


def t_read_corpus_stats(args: dict) -> dict:
    return _http("GET", "/api/v1/corpus/stats/")


def t_read_cohort(args: dict) -> dict:
    params = []
    if args.get("language"):
        params.append(f"language={args['language']}")
    if args.get("size_bucket"):
        params.append(f"size_bucket={args['size_bucket']}")
    qs = "?" + "&".join(params) if params else ""
    return _http("GET", f"/api/v1/cohort/{qs}")


def t_read_voting_guide(args: dict) -> dict:
    url = f"{BASE}/agents/voting/"
    req = urllib.request.Request(url, headers={"User-Agent": USER_AGENT, "Accept": "text/markdown"})
    try:
        with urllib.request.urlopen(req, timeout=TIMEOUT) as r:
            return {"ok": True, "url": url, "body": r.read().decode()}
    except Exception as e:  # noqa: BLE001
        return {"ok": False, "error": str(e)}


def t_get_rule_details(args: dict) -> dict:
    rule_id = args["rule_id"].upper().strip()
    req = urllib.request.Request(
        f"{BASE}/rule/{rule_id}/",
        headers={"User-Agent": USER_AGENT, "Accept": "application/json", "X-Agent-Id": AGENT_ID},
    )
    try:
        with urllib.request.urlopen(req, timeout=TIMEOUT) as r:
            return json.loads(r.read().decode())
    except urllib.error.HTTPError as e:
        return {"ok": False, "error": f"http {e.code}", "body": e.read().decode()[:300]}
    except Exception as e:  # noqa: BLE001
        return {"ok": False, "error": str(e)}


def t_get_live_activity(args: dict) -> dict:
    return _http("GET", "/api/v1/live/")


def t_get_calibration_history(args: dict) -> dict:
    limit = int(args.get("limit", 30))
    return _http("GET", f"/api/v1/calibration_history/?limit={limit}")


HANDLERS = {
    "scan_repo": t_scan_repo,
    "poll_scan": t_poll_scan,
    "scan_and_wait": t_scan_and_wait,
    "read_findings": t_read_findings,
    "vote_finding": t_vote_finding,
    "get_scan_report": t_get_scan_report,
    "get_sarif": t_get_sarif,
    "get_rule_details": t_get_rule_details,
    "get_live_activity": t_get_live_activity,
    "get_calibration_history": t_get_calibration_history,
    "file_issue_link": t_file_issue_link,
    "read_corpus_stats": t_read_corpus_stats,
    "read_cohort": t_read_cohort,
    "read_voting_guide": t_read_voting_guide,
}


# ─── JSON-RPC server (MCP over stdio) ─────────────────────────────
def _write(msg: dict) -> None:
    sys.stdout.write(json.dumps(msg) + "\n")
    sys.stdout.flush()


def _ok(rid: Any, result: Any) -> None:
    _write({"jsonrpc": "2.0", "id": rid, "result": result})


def _err(rid: Any, code: int, message: str) -> None:
    _write({"jsonrpc": "2.0", "id": rid, "error": {"code": code, "message": message}})


def handle(req: dict) -> None:
    method = req.get("method")
    rid = req.get("id")
    params = req.get("params") or {}

    if method == "initialize":
        _ok(rid, {
            "protocolVersion": params.get("protocolVersion", "2025-06-18"),
            "capabilities": {"tools": {}},
            "serverInfo": {"name": "repobility", "version": "1.0.0"},
        })
    elif method == "notifications/initialized" or method == "initialized":
        # notification, no response
        return
    elif method == "tools/list":
        _ok(rid, {"tools": TOOLS})
    elif method == "tools/call":
        name = params.get("name")
        args = params.get("arguments") or {}
        fn = HANDLERS.get(name)
        if fn is None:
            _err(rid, -32601, f"unknown tool: {name}")
            return
        try:
            result = fn(args)
            _ok(rid, {
                "content": [{"type": "text", "text": json.dumps(result, indent=2)}],
                "isError": not (result.get("ok", True) if isinstance(result, dict) else True),
            })
        except Exception as e:  # noqa: BLE001
            _err(rid, -32000, f"{type(e).__name__}: {e}")
    elif method == "ping":
        _ok(rid, {})
    else:
        if rid is not None:
            _err(rid, -32601, f"method not found: {method}")


def main() -> None:
    for line in sys.stdin:
        line = line.strip()
        if not line:
            continue
        try:
            req = json.loads(line)
        except json.JSONDecodeError:
            continue
        try:
            handle(req)
        except Exception as e:  # noqa: BLE001
            sys.stderr.write(f"mcp_repobility: handler crash: {e}\n")


if __name__ == "__main__":
    main()