#!/usr/bin/env python3 # For authorized penetration testing and educational environments only. # Usage against targets without prior mutual consent is illegal. # It is the end user's responsibility to obey all applicable local, state and federal laws. """Agent for detecting broken link hijacking vulnerabilities on websites.""" import argparse import json import re from datetime import datetime, timezone from urllib.parse import urlparse, urljoin try: import requests HAS_REQUESTS = True except ImportError: HAS_REQUESTS = False HIJACKABLE_PATTERNS = { "github": r"github\.com/[\w-]+(?!/[\w-]+)", "npm": r"npmjs\.com/package/[\w-]+", "twitter": r"twitter\.com/[\w]+", "bitbucket": r"bitbucket\.org/[\w-]+", "gitlab": r"gitlab\.com/[\w-]+", "pypi": r"pypi\.org/project/[\w-]+", } def extract_links(html, base_url): """Extract all links from HTML content.""" links = set() for match in re.finditer(r'href=["\']([^"\']+)', html): link = match.group(1) if link.startswith(("http://", "https://")): links.add(link) elif link.startswith("/"): links.add(urljoin(base_url, link)) for match in re.finditer(r'src=["\']([^"\']+)', html): link = match.group(1) if link.startswith(("http://", "https://")): links.add(link) return links def check_link_status(url): """Check if a URL is reachable and categorize its status.""" if not HAS_REQUESTS: return {"url": url, "status": "unknown", "note": "requests library not available"} try: resp = requests.head(url, timeout=10, allow_redirects=True, verify=False) return { "url": url, "status_code": resp.status_code, "final_url": resp.url, "broken": resp.status_code == 404, } except requests.ConnectionError: return {"url": url, "status_code": None, "broken": True, "error": "connection_failed"} except requests.Timeout: return {"url": url, "status_code": None, "broken": False, "error": "timeout"} except requests.RequestException as e: return {"url": url, "status_code": None, "broken": False, "error": str(e)[:100]} def check_hijackable(link_status): """Determine if a broken link is hijackable.""" url = link_status.get("url", "") if not link_status.get("broken"): return None parsed = urlparse(url) domain = parsed.netloc.lower() for platform, pattern in HIJACKABLE_PATTERNS.items(): if re.search(pattern, url): return { "url": url, "platform": platform, "domain": domain, "hijack_type": f"Register {platform} account/resource", "severity": "HIGH", } if link_status.get("error") == "connection_failed": return { "url": url, "domain": domain, "hijack_type": "Domain takeover (unregistered domain)", "severity": "CRITICAL", } return None def scan_website(target_url): """Scan a website for broken link hijacking opportunities.""" if not HAS_REQUESTS: return [] findings = [] try: resp = requests.get(target_url, timeout=15, verify=False) links = extract_links(resp.text, target_url) except requests.RequestException: return findings external_links = [l for l in links if urlparse(l).netloc != urlparse(target_url).netloc] print(f"[*] Found {len(external_links)} external links") for link in external_links: status = check_link_status(link) hijack = check_hijackable(status) if hijack: findings.append(hijack) return findings def main(): parser = argparse.ArgumentParser( description="Detect broken link hijacking vulnerabilities" ) parser.add_argument("--url", required=True, help="Target website URL") parser.add_argument("--output", "-o", help="Output JSON report") parser.add_argument("--verbose", "-v", action="store_true") args = parser.parse_args() print("[*] Broken Link Hijacking Detection Agent") findings = scan_website(args.url) report = { "timestamp": datetime.now(timezone.utc).isoformat(), "target": args.url, "hijackable_links": findings, "count": len(findings), "risk_level": "CRITICAL" if any(f["severity"] == "CRITICAL" for f in findings) else "HIGH" if findings else "LOW", } print(f"[*] Hijackable links found: {len(findings)}") if args.verbose: for f in findings: print(f" [{f['severity']}] {f['url']} - {f['hijack_type']}") if args.output: with open(args.output, "w") as f: json.dump(report, f, indent=2) print(f"[*] Report saved to {args.output}") else: print(json.dumps(report, indent=2)) if __name__ == "__main__": main()