Files
Anthropic-Cybersecurity-Skills/skills/collecting-open-source-intelligence/scripts/agent.py
T
mukul975 27c6414ca5 Add folder anatomy (scripts/agent.py + references/api-reference.md) for 648 cybersecurity skills
Complete skill folder anatomy across all cybersecurity skills:
- scripts/agent.py: 80-150 line Python agents using real libraries (impacket,
  boto3, azure-mgmt-*, kubernetes, pefile, yara, scapy, shodan, stix2, etc.)
- references/api-reference.md: real API documentation with method signatures
- LICENSE: MIT license for all skill folders
2026-03-10 21:02:12 +01:00

184 lines
6.4 KiB
Python

#!/usr/bin/env python3
# For authorized penetration testing and lab environments only
"""OSINT Collection Agent - Gathers open-source intelligence on targets using Shodan and crt.sh."""
import json
import logging
import argparse
from datetime import datetime
import requests
import shodan
logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s")
logger = logging.getLogger(__name__)
def search_shodan(api_key, query, max_results=100):
"""Search Shodan for hosts matching a query."""
api = shodan.Shodan(api_key)
results = api.search(query, limit=max_results)
hosts = []
for match in results["matches"]:
hosts.append({
"ip": match["ip_str"],
"port": match["port"],
"org": match.get("org", ""),
"os": match.get("os", ""),
"hostnames": match.get("hostnames", []),
"product": match.get("product", ""),
"version": match.get("version", ""),
"country": match.get("location", {}).get("country_name", ""),
"ssl_subject": match.get("ssl", {}).get("cert", {}).get("subject", {}),
})
logger.info("Shodan returned %d results for query: %s", len(hosts), query)
return hosts
def shodan_host_lookup(api_key, ip_address):
"""Lookup a specific host on Shodan for detailed information."""
api = shodan.Shodan(api_key)
host = api.host(ip_address)
return {
"ip": host["ip_str"],
"org": host.get("org", ""),
"os": host.get("os", ""),
"ports": host.get("ports", []),
"vulns": host.get("vulns", []),
"hostnames": host.get("hostnames", []),
"country": host.get("country_name", ""),
"city": host.get("city", ""),
"asn": host.get("asn", ""),
}
def query_crtsh(domain):
"""Query certificate transparency logs via crt.sh for subdomain discovery."""
url = f"https://crt.sh/?q=%.{domain}&output=json"
resp = requests.get(url, timeout=30)
if resp.status_code != 200:
logger.warning("crt.sh query failed for %s: %d", domain, resp.status_code)
return []
entries = resp.json()
subdomains = set()
for entry in entries:
name_value = entry.get("name_value", "")
for name in name_value.split("\n"):
name = name.strip().lower()
if name and "*" not in name:
subdomains.add(name)
logger.info("crt.sh found %d unique subdomains for %s", len(subdomains), domain)
return sorted(subdomains)
def whois_lookup(domain):
"""Perform WHOIS lookup via RDAP (Registration Data Access Protocol)."""
url = f"https://rdap.org/domain/{domain}"
resp = requests.get(url, timeout=30)
if resp.status_code == 200:
data = resp.json()
return {
"domain": domain,
"status": data.get("status", []),
"nameservers": [ns.get("ldhName", "") for ns in data.get("nameservers", [])],
"events": [
{"action": e["eventAction"], "date": e["eventDate"]}
for e in data.get("events", [])
],
}
return {"domain": domain, "error": resp.status_code}
def query_securitytrails(api_key, domain):
"""Query SecurityTrails API for DNS history."""
url = f"https://api.securitytrails.com/v1/domain/{domain}"
headers = {"apikey": api_key}
resp = requests.get(url, headers=headers, timeout=30)
if resp.status_code == 200:
data = resp.json()
return {
"domain": domain,
"current_dns": data.get("current_dns", {}),
"alexa_rank": data.get("alexa_rank"),
"hostname": data.get("hostname"),
}
return {"domain": domain, "error": resp.status_code}
def search_github_exposure(query, github_token=None):
"""Search GitHub for exposed credentials or sensitive data."""
headers = {"Accept": "application/vnd.github.v3+json"}
if github_token:
headers["Authorization"] = f"token {github_token}"
url = "https://api.github.com/search/code"
resp = requests.get(url, headers=headers, params={"q": query, "per_page": 20}, timeout=30)
if resp.status_code == 200:
items = resp.json().get("items", [])
results = []
for item in items:
results.append({
"repo": item["repository"]["full_name"],
"path": item["path"],
"url": item["html_url"],
})
logger.info("GitHub search found %d results for: %s", len(results), query)
return results
return []
def generate_osint_report(domain, subdomains, shodan_results, whois_data, github_results):
"""Generate a consolidated OSINT report."""
report = {
"target": domain,
"timestamp": datetime.utcnow().isoformat(),
"subdomains": subdomains,
"subdomain_count": len(subdomains),
"shodan_hosts": shodan_results,
"whois": whois_data,
"github_exposure": github_results,
}
lines = [
f"OSINT REPORT - {domain}",
"=" * 40,
f"Subdomains discovered: {len(subdomains)}",
f"Shodan hosts found: {len(shodan_results)}",
f"GitHub exposures: {len(github_results)}",
]
print("\n".join(lines))
return report
def main():
parser = argparse.ArgumentParser(description="OSINT Collection Agent")
parser.add_argument("--domain", required=True, help="Target domain for OSINT")
parser.add_argument("--shodan-key", help="Shodan API key")
parser.add_argument("--shodan-query", help="Custom Shodan search query")
parser.add_argument("--github-token", help="GitHub personal access token")
parser.add_argument("--output", default="osint_report.json")
args = parser.parse_args()
subdomains = query_crtsh(args.domain)
whois_data = whois_lookup(args.domain)
shodan_results = []
if args.shodan_key:
query = args.shodan_query or f"hostname:{args.domain}"
shodan_results = search_shodan(args.shodan_key, query)
github_results = []
if args.github_token:
github_results = search_github_exposure(
f'"{args.domain}" password OR secret OR api_key', args.github_token
)
report = generate_osint_report(
args.domain, subdomains, shodan_results, whois_data, github_results
)
with open(args.output, "w") as f:
json.dump(report, f, indent=2)
logger.info("OSINT report saved to %s", args.output)
if __name__ == "__main__":
main()