#!/usr/bin/env python3 """Agent for detecting broken link hijacking vulnerabilities on websites.""" import argparse import json import re import sys from datetime import datetime, timezone from urllib.parse import urlparse, urljoin try: import requests HAS_REQUESTS = True except ImportError: HAS_REQUESTS = False HIJACKABLE_PATTERNS = { "github": r"github\.com/[\w-]+(?!/[\w-]+)", "npm": r"npmjs\.com/package/[\w-]+", "twitter": r"twitter\.com/[\w]+", "bitbucket": r"bitbucket\.org/[\w-]+", "gitlab": r"gitlab\.com/[\w-]+", "pypi": r"pypi\.org/project/[\w-]+", } def extract_links(html, base_url): """Extract all links from HTML content.""" links = set() for match in re.finditer(r'href=["\']([^"\']+)', html): link = match.group(1) if link.startswith(("http://", "https://")): links.add(link) elif link.startswith("/"): links.add(urljoin(base_url, link)) for match in re.finditer(r'src=["\']([^"\']+)', html): link = match.group(1) if link.startswith(("http://", "https://")): links.add(link) return links def check_link_status(url): """Check if a URL is reachable and categorize its status.""" if not HAS_REQUESTS: return {"url": url, "status": "unknown", "note": "requests library not available"} try: resp = requests.head(url, timeout=10, allow_redirects=True, verify=False) return { "url": url, "status_code": resp.status_code, "final_url": resp.url, "broken": resp.status_code == 404, } except requests.ConnectionError: return {"url": url, "status_code": None, "broken": True, "error": "connection_failed"} except requests.Timeout: return {"url": url, "status_code": None, "broken": False, "error": "timeout"} except requests.RequestException as e: return {"url": url, "status_code": None, "broken": False, "error": str(e)[:100]} def check_hijackable(link_status): """Determine if a broken link is hijackable.""" url = link_status.get("url", "") if not link_status.get("broken"): return None parsed = urlparse(url) domain = parsed.netloc.lower() for platform, pattern in HIJACKABLE_PATTERNS.items(): if re.search(pattern, url): return { "url": url, "platform": platform, "domain": domain, "hijack_type": f"Register {platform} account/resource", "severity": "HIGH", } if link_status.get("error") == "connection_failed": return { "url": url, "domain": domain, "hijack_type": "Domain takeover (unregistered domain)", "severity": "CRITICAL", } return None def scan_website(target_url): """Scan a website for broken link hijacking opportunities.""" if not HAS_REQUESTS: return [] findings = [] try: resp = requests.get(target_url, timeout=15, verify=False) links = extract_links(resp.text, target_url) except requests.RequestException: return findings external_links = [l for l in links if urlparse(l).netloc != urlparse(target_url).netloc] print(f"[*] Found {len(external_links)} external links") for link in external_links: status = check_link_status(link) hijack = check_hijackable(status) if hijack: findings.append(hijack) return findings def main(): parser = argparse.ArgumentParser( description="Detect broken link hijacking vulnerabilities" ) parser.add_argument("--url", required=True, help="Target website URL") parser.add_argument("--output", "-o", help="Output JSON report") parser.add_argument("--verbose", "-v", action="store_true") args = parser.parse_args() print("[*] Broken Link Hijacking Detection Agent") findings = scan_website(args.url) report = { "timestamp": datetime.now(timezone.utc).isoformat(), "target": args.url, "hijackable_links": findings, "count": len(findings), "risk_level": "CRITICAL" if any(f["severity"] == "CRITICAL" for f in findings) else "HIGH" if findings else "LOW", } print(f"[*] Hijackable links found: {len(findings)}") if args.verbose: for f in findings: print(f" [{f['severity']}] {f['url']} - {f['hijack_type']}") if args.output: with open(args.output, "w") as f: json.dump(report, f, indent=2) print(f"[*] Report saved to {args.output}") else: print(json.dumps(report, indent=2)) if __name__ == "__main__": main()