Files
Anthropic-Cybersecurity-Skills/skills/performing-api-inventory-and-discovery/scripts/agent.py
T
mukul975 c47eed6a64 Production hardening: security fixes, code quality, 724 skills complete
- Fix 25 shell=True subprocess calls with list-based commands
- Fix 49 verify=False in defensive skills (env-var override)
- Add timeout to 231 HTTP/subprocess/socket calls
- Fix 6 SQL injection patterns with whitelist validation
- Replace 8 __import__() with standard imports
- Remove 701 unused imports across 442 files
- Add authorized-testing disclaimers to all offensive skills
- Complete 11 incomplete skill directories
- Expand 10 stub SKILL.md files with full content
- Fix 2 YAML parse errors in frontmatter
- Fix 5 pre-existing syntax errors
- Convert 22 hardcoded paths/ports to environment variables
- Back up 21 redundant skill pairs to .bak
- Fix 2 global declaration errors
- 724/724 skills with full folder anatomy (SKILL.md + agent.py + api-reference.md + LICENSE)
- 0 compile errors across all 724 agent.py files
2026-03-19 13:26:49 +01:00

233 lines
8.6 KiB
Python

#!/usr/bin/env python3
# For authorized testing only
"""API inventory and discovery agent for attack surface mapping."""
import json
import sys
import argparse
import re
import subprocess
from datetime import datetime
try:
import requests
except ImportError:
print("Install: pip install requests")
sys.exit(1)
COMMON_API_PATHS = [
"/api", "/api/v1", "/api/v2", "/api/v3",
"/graphql", "/graphiql", "/playground",
"/swagger.json", "/swagger/v1/swagger.json",
"/openapi.json", "/api-docs", "/docs",
"/health", "/healthz", "/status", "/metrics",
"/admin/api", "/internal/api", "/.well-known/openid-configuration",
"/v1", "/v2", "/rest", "/ws", "/rpc",
]
def discover_api_endpoints(base_url, paths=None, timeout=5):
"""Probe common API paths to discover active endpoints."""
if paths is None:
paths = COMMON_API_PATHS
discovered = []
for path in paths:
url = f"{base_url.rstrip('/')}{path}"
try:
resp = requests.get(url, timeout=timeout, allow_redirects=False,
verify=True, headers={"User-Agent": "API-Inventory-Agent/1.0"})
if resp.status_code < 500:
entry = {
"url": url,
"status": resp.status_code,
"content_type": resp.headers.get("Content-Type", ""),
"server": resp.headers.get("Server", ""),
}
if "json" in entry["content_type"]:
entry["type"] = "REST/JSON"
elif "xml" in entry["content_type"]:
entry["type"] = "SOAP/XML"
elif "html" in entry["content_type"] and "swagger" in path.lower():
entry["type"] = "API Documentation"
else:
entry["type"] = "unknown"
if resp.status_code == 200:
entry["finding"] = "Active API endpoint"
entry["severity"] = "INFO"
discovered.append(entry)
except requests.exceptions.RequestException:
pass
return discovered
def parse_swagger_spec(spec_url):
"""Fetch and parse OpenAPI/Swagger spec to inventory endpoints."""
try:
resp = requests.get(spec_url, timeout=15)
resp.raise_for_status()
spec = resp.json()
except Exception as e:
return {"error": str(e)}
version = spec.get("openapi", spec.get("swagger", "unknown"))
info = spec.get("info", {})
paths = spec.get("paths", {})
endpoints = []
for path, methods in paths.items():
for method in methods:
if method.upper() in ("GET", "POST", "PUT", "DELETE", "PATCH", "OPTIONS", "HEAD"):
op = methods[method]
endpoints.append({
"method": method.upper(),
"path": path,
"summary": op.get("summary", ""),
"deprecated": op.get("deprecated", False),
"auth_required": bool(op.get("security", spec.get("security", []))),
})
deprecated = [e for e in endpoints if e["deprecated"]]
return {
"spec_version": version,
"api_title": info.get("title", ""),
"api_version": info.get("version", ""),
"total_endpoints": len(endpoints),
"deprecated_endpoints": len(deprecated),
"endpoints": endpoints,
}
def scan_javascript_for_apis(js_url):
"""Fetch JavaScript file and extract API endpoint references."""
try:
resp = requests.get(js_url, timeout=15)
content = resp.text
except Exception as e:
return {"error": str(e)}
api_patterns = [
re.compile(r'["\'](/api/[^"\']+)["\']'),
re.compile(r'["\'](/v\d+/[^"\']+)["\']'),
re.compile(r'fetch\s*\(\s*["\']([^"\']+)["\']'),
re.compile(r'axios\.\w+\s*\(\s*["\']([^"\']+)["\']'),
re.compile(r'\.get\s*\(\s*["\']([^"\']+/api[^"\']*)["\']'),
re.compile(r'\.post\s*\(\s*["\']([^"\']+/api[^"\']*)["\']'),
]
found_apis = set()
for pattern in api_patterns:
for match in pattern.findall(content):
if len(match) > 3 and not match.endswith((".js", ".css", ".png", ".jpg")):
found_apis.add(match)
return {"source": js_url, "discovered_apis": sorted(found_apis), "count": len(found_apis)}
def enumerate_subdomains_for_apis(domain):
"""Use DNS enumeration to find API subdomains."""
api_prefixes = [
"api", "api-v1", "api-v2", "api-gateway", "api-internal",
"gateway", "graphql", "rest", "ws", "webhook",
"staging-api", "dev-api", "sandbox-api", "beta-api",
"admin-api", "partner-api", "public-api", "mobile-api",
]
found = []
for prefix in api_prefixes:
subdomain = f"{prefix}.{domain}"
try:
result = subprocess.run(
["nslookup", subdomain], capture_output=True, text=True, timeout=5
)
if "Non-authoritative answer" in result.stdout or "Address:" in result.stdout:
found.append({
"subdomain": subdomain,
"status": "resolved",
"severity": "MEDIUM" if "internal" in prefix or "staging" in prefix else "INFO",
})
except (subprocess.TimeoutExpired, FileNotFoundError):
pass
return found
def classify_api_risk(endpoints):
"""Classify discovered APIs by risk level."""
findings = []
for ep in endpoints:
url = ep.get("url", ep.get("path", ""))
risk = "LOW"
reason = "Standard endpoint"
if any(p in url.lower() for p in ["/admin", "/internal", "/debug", "/metrics"]):
risk = "HIGH"
reason = "Administrative/internal endpoint exposed"
elif any(p in url.lower() for p in ["/graphql", "/graphiql", "/playground"]):
risk = "HIGH"
reason = "GraphQL endpoint — check introspection"
elif "swagger" in url.lower() or "api-docs" in url.lower():
risk = "MEDIUM"
reason = "API documentation publicly accessible"
elif ep.get("deprecated", False):
risk = "HIGH"
reason = "Deprecated/zombie API still accessible"
findings.append({**ep, "risk": risk, "reason": reason})
return findings
def run_audit(args):
"""Execute API inventory and discovery audit."""
print(f"\n{'='*60}")
print(f" API INVENTORY AND DISCOVERY AUDIT")
print(f" Generated: {datetime.utcnow().isoformat()} UTC")
print(f"{'='*60}\n")
report = {}
if args.target_url:
discovered = discover_api_endpoints(args.target_url)
classified = classify_api_risk(discovered)
report["discovered_endpoints"] = classified
print(f"--- ENDPOINT DISCOVERY ({len(classified)} found) ---")
for ep in classified:
print(f" [{ep['risk']}] {ep['url']} ({ep.get('status','')}): {ep['reason']}")
if args.swagger_url:
spec = parse_swagger_spec(args.swagger_url)
report["swagger_spec"] = spec
print(f"\n--- SWAGGER SPEC ANALYSIS ---")
print(f" API: {spec.get('api_title','')} v{spec.get('api_version','')}")
print(f" Endpoints: {spec.get('total_endpoints',0)}")
print(f" Deprecated: {spec.get('deprecated_endpoints',0)}")
if args.js_url:
js_apis = scan_javascript_for_apis(args.js_url)
report["js_api_discovery"] = js_apis
print(f"\n--- JAVASCRIPT API EXTRACTION ({js_apis.get('count',0)}) ---")
for api in js_apis.get("discovered_apis", [])[:15]:
print(f" {api}")
if args.domain:
subs = enumerate_subdomains_for_apis(args.domain)
report["api_subdomains"] = subs
print(f"\n--- API SUBDOMAIN ENUMERATION ({len(subs)} found) ---")
for s in subs:
print(f" [{s['severity']}] {s['subdomain']}")
return report
def main():
parser = argparse.ArgumentParser(description="API Inventory Discovery Agent")
parser.add_argument("--target-url", help="Base URL to probe for API endpoints")
parser.add_argument("--swagger-url", help="Swagger/OpenAPI spec URL to parse")
parser.add_argument("--js-url", help="JavaScript file URL to extract API paths")
parser.add_argument("--domain", help="Domain for API subdomain enumeration")
parser.add_argument("--output", help="Save report to JSON file")
args = parser.parse_args()
report = run_audit(args)
if args.output:
with open(args.output, "w") as f:
json.dump(report, f, indent=2, default=str)
print(f"\n[+] Report saved to {args.output}")
if __name__ == "__main__":
main()