Anthropic-Cybersecurity-Skills/skills/performing-api-inventory-and-discovery/scripts/agent.py

#!/usr/bin/env python3
# For authorized testing only
"""API inventory and discovery agent for attack surface mapping."""

import json
import sys
import argparse
import re
import subprocess
from datetime import datetime

try:
    import requests
except ImportError:
    print("Install: pip install requests")
    sys.exit(1)


COMMON_API_PATHS = [
    "/api", "/api/v1", "/api/v2", "/api/v3",
    "/graphql", "/graphiql", "/playground",
    "/swagger.json", "/swagger/v1/swagger.json",
    "/openapi.json", "/api-docs", "/docs",
    "/health", "/healthz", "/status", "/metrics",
    "/admin/api", "/internal/api", "/.well-known/openid-configuration",
    "/v1", "/v2", "/rest", "/ws", "/rpc",
]


def discover_api_endpoints(base_url, paths=None, timeout=5):
    """Probe common API paths to discover active endpoints."""
    if paths is None:
        paths = COMMON_API_PATHS
    discovered = []
    for path in paths:
        url = f"{base_url.rstrip('/')}{path}"
        try:
            resp = requests.get(url, timeout=timeout, allow_redirects=False,
                                verify=True, headers={"User-Agent": "API-Inventory-Agent/1.0"})
            if resp.status_code < 500:
                entry = {
                    "url": url,
                    "status": resp.status_code,
                    "content_type": resp.headers.get("Content-Type", ""),
                    "server": resp.headers.get("Server", ""),
                }
                if "json" in entry["content_type"]:
                    entry["type"] = "REST/JSON"
                elif "xml" in entry["content_type"]:
                    entry["type"] = "SOAP/XML"
                elif "html" in entry["content_type"] and "swagger" in path.lower():
                    entry["type"] = "API Documentation"
                else:
                    entry["type"] = "unknown"
                if resp.status_code == 200:
                    entry["finding"] = "Active API endpoint"
                    entry["severity"] = "INFO"
                discovered.append(entry)
        except requests.exceptions.RequestException:
            pass
    return discovered


def parse_swagger_spec(spec_url):
    """Fetch and parse OpenAPI/Swagger spec to inventory endpoints."""
    try:
        resp = requests.get(spec_url, timeout=15)
        resp.raise_for_status()
        spec = resp.json()
    except Exception as e:
        return {"error": str(e)}

    version = spec.get("openapi", spec.get("swagger", "unknown"))
    info = spec.get("info", {})
    paths = spec.get("paths", {})
    endpoints = []
    for path, methods in paths.items():
        for method in methods:
            if method.upper() in ("GET", "POST", "PUT", "DELETE", "PATCH", "OPTIONS", "HEAD"):
                op = methods[method]
                endpoints.append({
                    "method": method.upper(),
                    "path": path,
                    "summary": op.get("summary", ""),
                    "deprecated": op.get("deprecated", False),
                    "auth_required": bool(op.get("security", spec.get("security", []))),
                })
    deprecated = [e for e in endpoints if e["deprecated"]]
    return {
        "spec_version": version,
        "api_title": info.get("title", ""),
        "api_version": info.get("version", ""),
        "total_endpoints": len(endpoints),
        "deprecated_endpoints": len(deprecated),
        "endpoints": endpoints,
    }


def scan_javascript_for_apis(js_url):
    """Fetch JavaScript file and extract API endpoint references."""
    try:
        resp = requests.get(js_url, timeout=15)
        content = resp.text
    except Exception as e:
        return {"error": str(e)}

    api_patterns = [
        re.compile(r'["\'](/api/[^"\']+)["\']'),
        re.compile(r'["\'](/v\d+/[^"\']+)["\']'),
        re.compile(r'fetch\s*\(\s*["\']([^"\']+)["\']'),
        re.compile(r'axios\.\w+\s*\(\s*["\']([^"\']+)["\']'),
        re.compile(r'\.get\s*\(\s*["\']([^"\']+/api[^"\']*)["\']'),
        re.compile(r'\.post\s*\(\s*["\']([^"\']+/api[^"\']*)["\']'),
    ]
    found_apis = set()
    for pattern in api_patterns:
        for match in pattern.findall(content):
            if len(match) > 3 and not match.endswith((".js", ".css", ".png", ".jpg")):
                found_apis.add(match)

    return {"source": js_url, "discovered_apis": sorted(found_apis), "count": len(found_apis)}


def enumerate_subdomains_for_apis(domain):
    """Use DNS enumeration to find API subdomains."""
    api_prefixes = [
        "api", "api-v1", "api-v2", "api-gateway", "api-internal",
        "gateway", "graphql", "rest", "ws", "webhook",
        "staging-api", "dev-api", "sandbox-api", "beta-api",
        "admin-api", "partner-api", "public-api", "mobile-api",
    ]
    found = []
    for prefix in api_prefixes:
        subdomain = f"{prefix}.{domain}"
        try:
            result = subprocess.run(
                ["nslookup", subdomain], capture_output=True, text=True, timeout=5
            )
            if "Non-authoritative answer" in result.stdout or "Address:" in result.stdout:
                found.append({
                    "subdomain": subdomain,
                    "status": "resolved",
                    "severity": "MEDIUM" if "internal" in prefix or "staging" in prefix else "INFO",
                })
        except (subprocess.TimeoutExpired, FileNotFoundError):
            pass
    return found


def classify_api_risk(endpoints):
    """Classify discovered APIs by risk level."""
    findings = []
    for ep in endpoints:
        url = ep.get("url", ep.get("path", ""))
        risk = "LOW"
        reason = "Standard endpoint"
        if any(p in url.lower() for p in ["/admin", "/internal", "/debug", "/metrics"]):
            risk = "HIGH"
            reason = "Administrative/internal endpoint exposed"
        elif any(p in url.lower() for p in ["/graphql", "/graphiql", "/playground"]):
            risk = "HIGH"
            reason = "GraphQL endpoint — check introspection"
        elif "swagger" in url.lower() or "api-docs" in url.lower():
            risk = "MEDIUM"
            reason = "API documentation publicly accessible"
        elif ep.get("deprecated", False):
            risk = "HIGH"
            reason = "Deprecated/zombie API still accessible"
        findings.append({**ep, "risk": risk, "reason": reason})
    return findings


def run_audit(args):
    """Execute API inventory and discovery audit."""
    print(f"\n{'='*60}")
    print(f"  API INVENTORY AND DISCOVERY AUDIT")
    print(f"  Generated: {datetime.utcnow().isoformat()} UTC")
    print(f"{'='*60}\n")

    report = {}

    if args.target_url:
        discovered = discover_api_endpoints(args.target_url)
        classified = classify_api_risk(discovered)
        report["discovered_endpoints"] = classified
        print(f"--- ENDPOINT DISCOVERY ({len(classified)} found) ---")
        for ep in classified:
            print(f"  [{ep['risk']}] {ep['url']} ({ep.get('status','')}): {ep['reason']}")

    if args.swagger_url:
        spec = parse_swagger_spec(args.swagger_url)
        report["swagger_spec"] = spec
        print(f"\n--- SWAGGER SPEC ANALYSIS ---")
        print(f"  API: {spec.get('api_title','')} v{spec.get('api_version','')}")
        print(f"  Endpoints: {spec.get('total_endpoints',0)}")
        print(f"  Deprecated: {spec.get('deprecated_endpoints',0)}")

    if args.js_url:
        js_apis = scan_javascript_for_apis(args.js_url)
        report["js_api_discovery"] = js_apis
        print(f"\n--- JAVASCRIPT API EXTRACTION ({js_apis.get('count',0)}) ---")
        for api in js_apis.get("discovered_apis", [])[:15]:
            print(f"  {api}")

    if args.domain:
        subs = enumerate_subdomains_for_apis(args.domain)
        report["api_subdomains"] = subs
        print(f"\n--- API SUBDOMAIN ENUMERATION ({len(subs)} found) ---")
        for s in subs:
            print(f"  [{s['severity']}] {s['subdomain']}")

    return report


def main():
    parser = argparse.ArgumentParser(description="API Inventory Discovery Agent")
    parser.add_argument("--target-url", help="Base URL to probe for API endpoints")
    parser.add_argument("--swagger-url", help="Swagger/OpenAPI spec URL to parse")
    parser.add_argument("--js-url", help="JavaScript file URL to extract API paths")
    parser.add_argument("--domain", help="Domain for API subdomain enumeration")
    parser.add_argument("--output", help="Save report to JSON file")
    args = parser.parse_args()

    report = run_audit(args)
    if args.output:
        with open(args.output, "w") as f:
            json.dump(report, f, indent=2, default=str)
        print(f"\n[+] Report saved to {args.output}")


if __name__ == "__main__":
    main()