Files
Anthropic-Cybersecurity-Skills/skills/exploiting-broken-link-hijacking/scripts/agent.py
T
mukul975 c21af3347e Complete folder anatomy for all 649 cybersecurity skills + update LICENSE to Mahipal
- Add scripts/agent.py and references/api-reference.md to all remaining skills
- Update all 648 LICENSE files: copyright now reads 'Mahipal'
- Add implementing-security-monitoring-with-datadog (new skill with full anatomy)
- All 649 skills now have: SKILL.md, LICENSE, scripts/agent.py, references/api-reference.md
2026-03-11 00:22:12 +01:00

147 lines
4.6 KiB
Python

#!/usr/bin/env python3
"""Agent for detecting broken link hijacking vulnerabilities on websites."""
import argparse
import json
import re
import sys
from datetime import datetime, timezone
from urllib.parse import urlparse, urljoin
try:
import requests
HAS_REQUESTS = True
except ImportError:
HAS_REQUESTS = False
HIJACKABLE_PATTERNS = {
"github": r"github\.com/[\w-]+(?!/[\w-]+)",
"npm": r"npmjs\.com/package/[\w-]+",
"twitter": r"twitter\.com/[\w]+",
"bitbucket": r"bitbucket\.org/[\w-]+",
"gitlab": r"gitlab\.com/[\w-]+",
"pypi": r"pypi\.org/project/[\w-]+",
}
def extract_links(html, base_url):
"""Extract all links from HTML content."""
links = set()
for match in re.finditer(r'href=["\']([^"\']+)', html):
link = match.group(1)
if link.startswith(("http://", "https://")):
links.add(link)
elif link.startswith("/"):
links.add(urljoin(base_url, link))
for match in re.finditer(r'src=["\']([^"\']+)', html):
link = match.group(1)
if link.startswith(("http://", "https://")):
links.add(link)
return links
def check_link_status(url):
"""Check if a URL is reachable and categorize its status."""
if not HAS_REQUESTS:
return {"url": url, "status": "unknown", "note": "requests library not available"}
try:
resp = requests.head(url, timeout=10, allow_redirects=True, verify=False)
return {
"url": url,
"status_code": resp.status_code,
"final_url": resp.url,
"broken": resp.status_code == 404,
}
except requests.ConnectionError:
return {"url": url, "status_code": None, "broken": True, "error": "connection_failed"}
except requests.Timeout:
return {"url": url, "status_code": None, "broken": False, "error": "timeout"}
except requests.RequestException as e:
return {"url": url, "status_code": None, "broken": False, "error": str(e)[:100]}
def check_hijackable(link_status):
"""Determine if a broken link is hijackable."""
url = link_status.get("url", "")
if not link_status.get("broken"):
return None
parsed = urlparse(url)
domain = parsed.netloc.lower()
for platform, pattern in HIJACKABLE_PATTERNS.items():
if re.search(pattern, url):
return {
"url": url,
"platform": platform,
"domain": domain,
"hijack_type": f"Register {platform} account/resource",
"severity": "HIGH",
}
if link_status.get("error") == "connection_failed":
return {
"url": url,
"domain": domain,
"hijack_type": "Domain takeover (unregistered domain)",
"severity": "CRITICAL",
}
return None
def scan_website(target_url):
"""Scan a website for broken link hijacking opportunities."""
if not HAS_REQUESTS:
return []
findings = []
try:
resp = requests.get(target_url, timeout=15, verify=False)
links = extract_links(resp.text, target_url)
except requests.RequestException:
return findings
external_links = [l for l in links if urlparse(l).netloc != urlparse(target_url).netloc]
print(f"[*] Found {len(external_links)} external links")
for link in external_links:
status = check_link_status(link)
hijack = check_hijackable(status)
if hijack:
findings.append(hijack)
return findings
def main():
parser = argparse.ArgumentParser(
description="Detect broken link hijacking vulnerabilities"
)
parser.add_argument("--url", required=True, help="Target website URL")
parser.add_argument("--output", "-o", help="Output JSON report")
parser.add_argument("--verbose", "-v", action="store_true")
args = parser.parse_args()
print("[*] Broken Link Hijacking Detection Agent")
findings = scan_website(args.url)
report = {
"timestamp": datetime.now(timezone.utc).isoformat(),
"target": args.url,
"hijackable_links": findings,
"count": len(findings),
"risk_level": "CRITICAL" if any(f["severity"] == "CRITICAL" for f in findings) else "HIGH" if findings else "LOW",
}
print(f"[*] Hijackable links found: {len(findings)}")
if args.verbose:
for f in findings:
print(f" [{f['severity']}] {f['url']} - {f['hijack_type']}")
if args.output:
with open(args.output, "w") as f:
json.dump(report, f, indent=2)
print(f"[*] Report saved to {args.output}")
else:
print(json.dumps(report, indent=2))
if __name__ == "__main__":
main()