Files
Anthropic-Cybersecurity-Skills/skills/analyzing-malicious-url-with-urlscan/scripts/agent.py
T
mukul975 27c6414ca5 Add folder anatomy (scripts/agent.py + references/api-reference.md) for 648 cybersecurity skills
Complete skill folder anatomy across all cybersecurity skills:
- scripts/agent.py: 80-150 line Python agents using real libraries (impacket,
  boto3, azure-mgmt-*, kubernetes, pefile, yara, scapy, shodan, stix2, etc.)
- references/api-reference.md: real API documentation with method signatures
- LICENSE: MIT license for all skill folders
2026-03-10 21:02:12 +01:00

171 lines
6.2 KiB
Python

#!/usr/bin/env python3
"""URLScan.io Malicious URL Analysis Agent - Submits and analyzes URLs via the urlscan.io API."""
import json
import time
import logging
import argparse
from datetime import datetime
import requests
logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s")
logger = logging.getLogger(__name__)
URLSCAN_API = "https://urlscan.io/api/v1"
def submit_url(url, api_key, visibility="private"):
"""Submit a URL to urlscan.io for scanning."""
headers = {"API-Key": api_key, "Content-Type": "application/json"}
payload = {"url": url, "visibility": visibility}
resp = requests.post(f"{URLSCAN_API}/scan/", headers=headers, json=payload, timeout=30)
resp.raise_for_status()
data = resp.json()
logger.info("Submitted URL: %s -> scan UUID: %s", url, data.get("uuid"))
return data
def get_scan_result(uuid, api_key, max_wait=120):
"""Poll for scan results until complete."""
headers = {"API-Key": api_key}
for _ in range(max_wait // 5):
try:
resp = requests.get(f"{URLSCAN_API}/result/{uuid}/", headers=headers, timeout=30)
if resp.status_code == 200:
return resp.json()
except requests.RequestException:
pass
time.sleep(5)
return None
def search_urlscan(query, api_key, size=100):
"""Search urlscan.io for existing scans."""
headers = {"API-Key": api_key}
params = {"q": query, "size": size}
resp = requests.get(f"{URLSCAN_API}/search/", headers=headers, params=params, timeout=30)
resp.raise_for_status()
return resp.json().get("results", [])
def analyze_result(result):
"""Analyze urlscan.io scan result for malicious indicators."""
findings = []
verdicts = result.get("verdicts", {})
overall = verdicts.get("overall", {})
urlscan_verdict = verdicts.get("urlscan", {})
community = verdicts.get("community", {})
if overall.get("malicious"):
findings.append({"type": "Malicious verdict", "severity": "critical", "source": "overall", "score": overall.get("score", 0)})
if urlscan_verdict.get("malicious"):
findings.append({"type": "URLScan malicious", "severity": "critical", "score": urlscan_verdict.get("score", 0)})
if community.get("score", 0) < 0:
findings.append({"type": "Negative community score", "severity": "high", "score": community.get("score")})
page = result.get("page", {})
lists = result.get("lists", {})
stats = result.get("stats", {})
if lists.get("ips", []):
for ip in lists["ips"]:
if ip.get("malicious"):
findings.append({"type": "Malicious IP contacted", "severity": "high", "ip": ip.get("ip"), "asn": ip.get("asn")})
for cert in lists.get("certificates", []):
if cert.get("validTo"):
try:
exp = datetime.fromisoformat(cert["validTo"].replace("Z", "+00:00"))
if exp < datetime.now(exp.tzinfo):
findings.append({"type": "Expired TLS certificate", "severity": "medium", "subject": cert.get("subjectName")})
except (ValueError, TypeError):
pass
js_count = len([r for r in stats.get("resourceStats", []) if "javascript" in r.get("type", "").lower()])
if js_count > 20:
findings.append({"type": "High JavaScript resource count", "severity": "medium", "count": js_count})
redirects = stats.get("uniqCountries", 0)
if result.get("data", {}).get("requests"):
redirect_chain = [r.get("request", {}).get("redirectHasExtraInfo") for r in result["data"]["requests"][:5]]
return {
"url": page.get("url", ""),
"domain": page.get("domain", ""),
"ip": page.get("ip", ""),
"country": page.get("country", ""),
"server": page.get("server", ""),
"status_code": page.get("status", 0),
"title": page.get("title", ""),
"mime_type": page.get("mimeType", ""),
"tls_issuer": page.get("tlsIssuer", ""),
"overall_malicious": overall.get("malicious", False),
"overall_score": overall.get("score", 0),
"findings": findings,
}
def bulk_analyze(urls, api_key):
"""Submit and analyze multiple URLs."""
results = []
for url in urls:
try:
submission = submit_url(url, api_key)
uuid = submission.get("uuid")
if uuid:
result = get_scan_result(uuid, api_key)
if result:
analysis = analyze_result(result)
results.append(analysis)
else:
results.append({"url": url, "error": "Scan timeout"})
except requests.RequestException as e:
results.append({"url": url, "error": str(e)})
return results
def generate_report(analyses):
"""Generate URL analysis report."""
malicious = [a for a in analyses if a.get("overall_malicious")]
report = {
"timestamp": datetime.utcnow().isoformat(),
"urls_analyzed": len(analyses),
"malicious_count": len(malicious),
"results": analyses,
}
print(f"URLSCAN REPORT: {len(analyses)} URLs analyzed, {len(malicious)} malicious")
return report
def main():
parser = argparse.ArgumentParser(description="URLScan.io Malicious URL Analysis Agent")
parser.add_argument("--api-key", required=True, help="urlscan.io API key")
parser.add_argument("--url", help="Single URL to scan")
parser.add_argument("--url-file", help="File with URLs (one per line)")
parser.add_argument("--search", help="Search query for existing scans")
parser.add_argument("--output", default="urlscan_report.json")
args = parser.parse_args()
urls = []
if args.url:
urls.append(args.url)
if args.url_file:
with open(args.url_file) as f:
urls.extend(line.strip() for line in f if line.strip())
if args.search:
results = search_urlscan(args.search, args.api_key)
analyses = [analyze_result(r) for r in results if "page" in r]
else:
analyses = bulk_analyze(urls, args.api_key)
report = generate_report(analyses)
with open(args.output, "w") as f:
json.dump(report, f, indent=2)
logger.info("Report saved to %s", args.output)
if __name__ == "__main__":
main()