Files
Anthropic-Cybersecurity-Skills/skills/performing-api-inventory-and-discovery/scripts/agent.py
T
mukul975 c21af3347e Complete folder anatomy for all 649 cybersecurity skills + update LICENSE to Mahipal
- Add scripts/agent.py and references/api-reference.md to all remaining skills
- Update all 648 LICENSE files: copyright now reads 'Mahipal'
- Add implementing-security-monitoring-with-datadog (new skill with full anatomy)
- All 649 skills now have: SKILL.md, LICENSE, scripts/agent.py, references/api-reference.md
2026-03-11 00:22:12 +01:00

234 lines
8.6 KiB
Python

#!/usr/bin/env python3
# For authorized testing only
"""API inventory and discovery agent for attack surface mapping."""
import json
import sys
import argparse
import re
import subprocess
from datetime import datetime
from collections import defaultdict
try:
import requests
except ImportError:
print("Install: pip install requests")
sys.exit(1)
COMMON_API_PATHS = [
"/api", "/api/v1", "/api/v2", "/api/v3",
"/graphql", "/graphiql", "/playground",
"/swagger.json", "/swagger/v1/swagger.json",
"/openapi.json", "/api-docs", "/docs",
"/health", "/healthz", "/status", "/metrics",
"/admin/api", "/internal/api", "/.well-known/openid-configuration",
"/v1", "/v2", "/rest", "/ws", "/rpc",
]
def discover_api_endpoints(base_url, paths=None, timeout=5):
"""Probe common API paths to discover active endpoints."""
if paths is None:
paths = COMMON_API_PATHS
discovered = []
for path in paths:
url = f"{base_url.rstrip('/')}{path}"
try:
resp = requests.get(url, timeout=timeout, allow_redirects=False,
verify=True, headers={"User-Agent": "API-Inventory-Agent/1.0"})
if resp.status_code < 500:
entry = {
"url": url,
"status": resp.status_code,
"content_type": resp.headers.get("Content-Type", ""),
"server": resp.headers.get("Server", ""),
}
if "json" in entry["content_type"]:
entry["type"] = "REST/JSON"
elif "xml" in entry["content_type"]:
entry["type"] = "SOAP/XML"
elif "html" in entry["content_type"] and "swagger" in path.lower():
entry["type"] = "API Documentation"
else:
entry["type"] = "unknown"
if resp.status_code == 200:
entry["finding"] = "Active API endpoint"
entry["severity"] = "INFO"
discovered.append(entry)
except requests.exceptions.RequestException:
pass
return discovered
def parse_swagger_spec(spec_url):
"""Fetch and parse OpenAPI/Swagger spec to inventory endpoints."""
try:
resp = requests.get(spec_url, timeout=15)
resp.raise_for_status()
spec = resp.json()
except Exception as e:
return {"error": str(e)}
version = spec.get("openapi", spec.get("swagger", "unknown"))
info = spec.get("info", {})
paths = spec.get("paths", {})
endpoints = []
for path, methods in paths.items():
for method in methods:
if method.upper() in ("GET", "POST", "PUT", "DELETE", "PATCH", "OPTIONS", "HEAD"):
op = methods[method]
endpoints.append({
"method": method.upper(),
"path": path,
"summary": op.get("summary", ""),
"deprecated": op.get("deprecated", False),
"auth_required": bool(op.get("security", spec.get("security", []))),
})
deprecated = [e for e in endpoints if e["deprecated"]]
return {
"spec_version": version,
"api_title": info.get("title", ""),
"api_version": info.get("version", ""),
"total_endpoints": len(endpoints),
"deprecated_endpoints": len(deprecated),
"endpoints": endpoints,
}
def scan_javascript_for_apis(js_url):
"""Fetch JavaScript file and extract API endpoint references."""
try:
resp = requests.get(js_url, timeout=15)
content = resp.text
except Exception as e:
return {"error": str(e)}
api_patterns = [
re.compile(r'["\'](/api/[^"\']+)["\']'),
re.compile(r'["\'](/v\d+/[^"\']+)["\']'),
re.compile(r'fetch\s*\(\s*["\']([^"\']+)["\']'),
re.compile(r'axios\.\w+\s*\(\s*["\']([^"\']+)["\']'),
re.compile(r'\.get\s*\(\s*["\']([^"\']+/api[^"\']*)["\']'),
re.compile(r'\.post\s*\(\s*["\']([^"\']+/api[^"\']*)["\']'),
]
found_apis = set()
for pattern in api_patterns:
for match in pattern.findall(content):
if len(match) > 3 and not match.endswith((".js", ".css", ".png", ".jpg")):
found_apis.add(match)
return {"source": js_url, "discovered_apis": sorted(found_apis), "count": len(found_apis)}
def enumerate_subdomains_for_apis(domain):
"""Use DNS enumeration to find API subdomains."""
api_prefixes = [
"api", "api-v1", "api-v2", "api-gateway", "api-internal",
"gateway", "graphql", "rest", "ws", "webhook",
"staging-api", "dev-api", "sandbox-api", "beta-api",
"admin-api", "partner-api", "public-api", "mobile-api",
]
found = []
for prefix in api_prefixes:
subdomain = f"{prefix}.{domain}"
try:
result = subprocess.run(
["nslookup", subdomain], capture_output=True, text=True, timeout=5
)
if "Non-authoritative answer" in result.stdout or "Address:" in result.stdout:
found.append({
"subdomain": subdomain,
"status": "resolved",
"severity": "MEDIUM" if "internal" in prefix or "staging" in prefix else "INFO",
})
except (subprocess.TimeoutExpired, FileNotFoundError):
pass
return found
def classify_api_risk(endpoints):
"""Classify discovered APIs by risk level."""
findings = []
for ep in endpoints:
url = ep.get("url", ep.get("path", ""))
risk = "LOW"
reason = "Standard endpoint"
if any(p in url.lower() for p in ["/admin", "/internal", "/debug", "/metrics"]):
risk = "HIGH"
reason = "Administrative/internal endpoint exposed"
elif any(p in url.lower() for p in ["/graphql", "/graphiql", "/playground"]):
risk = "HIGH"
reason = "GraphQL endpoint — check introspection"
elif "swagger" in url.lower() or "api-docs" in url.lower():
risk = "MEDIUM"
reason = "API documentation publicly accessible"
elif ep.get("deprecated", False):
risk = "HIGH"
reason = "Deprecated/zombie API still accessible"
findings.append({**ep, "risk": risk, "reason": reason})
return findings
def run_audit(args):
"""Execute API inventory and discovery audit."""
print(f"\n{'='*60}")
print(f" API INVENTORY AND DISCOVERY AUDIT")
print(f" Generated: {datetime.utcnow().isoformat()} UTC")
print(f"{'='*60}\n")
report = {}
if args.target_url:
discovered = discover_api_endpoints(args.target_url)
classified = classify_api_risk(discovered)
report["discovered_endpoints"] = classified
print(f"--- ENDPOINT DISCOVERY ({len(classified)} found) ---")
for ep in classified:
print(f" [{ep['risk']}] {ep['url']} ({ep.get('status','')}): {ep['reason']}")
if args.swagger_url:
spec = parse_swagger_spec(args.swagger_url)
report["swagger_spec"] = spec
print(f"\n--- SWAGGER SPEC ANALYSIS ---")
print(f" API: {spec.get('api_title','')} v{spec.get('api_version','')}")
print(f" Endpoints: {spec.get('total_endpoints',0)}")
print(f" Deprecated: {spec.get('deprecated_endpoints',0)}")
if args.js_url:
js_apis = scan_javascript_for_apis(args.js_url)
report["js_api_discovery"] = js_apis
print(f"\n--- JAVASCRIPT API EXTRACTION ({js_apis.get('count',0)}) ---")
for api in js_apis.get("discovered_apis", [])[:15]:
print(f" {api}")
if args.domain:
subs = enumerate_subdomains_for_apis(args.domain)
report["api_subdomains"] = subs
print(f"\n--- API SUBDOMAIN ENUMERATION ({len(subs)} found) ---")
for s in subs:
print(f" [{s['severity']}] {s['subdomain']}")
return report
def main():
parser = argparse.ArgumentParser(description="API Inventory Discovery Agent")
parser.add_argument("--target-url", help="Base URL to probe for API endpoints")
parser.add_argument("--swagger-url", help="Swagger/OpenAPI spec URL to parse")
parser.add_argument("--js-url", help="JavaScript file URL to extract API paths")
parser.add_argument("--domain", help="Domain for API subdomain enumeration")
parser.add_argument("--output", help="Save report to JSON file")
args = parser.parse_args()
report = run_audit(args)
if args.output:
with open(args.output, "w") as f:
json.dump(report, f, indent=2, default=str)
print(f"\n[+] Report saved to {args.output}")
if __name__ == "__main__":
main()