Files
mukul975 c47eed6a64 Production hardening: security fixes, code quality, 724 skills complete
- Fix 25 shell=True subprocess calls with list-based commands
- Fix 49 verify=False in defensive skills (env-var override)
- Add timeout to 231 HTTP/subprocess/socket calls
- Fix 6 SQL injection patterns with whitelist validation
- Replace 8 __import__() with standard imports
- Remove 701 unused imports across 442 files
- Add authorized-testing disclaimers to all offensive skills
- Complete 11 incomplete skill directories
- Expand 10 stub SKILL.md files with full content
- Fix 2 YAML parse errors in frontmatter
- Fix 5 pre-existing syntax errors
- Convert 22 hardcoded paths/ports to environment variables
- Back up 21 redundant skill pairs to .bak
- Fix 2 global declaration errors
- 724/724 skills with full folder anatomy (SKILL.md + agent.py + api-reference.md + LICENSE)
- 0 compile errors across all 724 agent.py files
2026-03-19 13:26:49 +01:00

149 lines
4.8 KiB
Python

#!/usr/bin/env python3
# For authorized penetration testing and educational environments only.
# Usage against targets without prior mutual consent is illegal.
# It is the end user's responsibility to obey all applicable local, state and federal laws.
"""Agent for detecting broken link hijacking vulnerabilities on websites."""
import argparse
import json
import re
from datetime import datetime, timezone
from urllib.parse import urlparse, urljoin
try:
import requests
HAS_REQUESTS = True
except ImportError:
HAS_REQUESTS = False
HIJACKABLE_PATTERNS = {
"github": r"github\.com/[\w-]+(?!/[\w-]+)",
"npm": r"npmjs\.com/package/[\w-]+",
"twitter": r"twitter\.com/[\w]+",
"bitbucket": r"bitbucket\.org/[\w-]+",
"gitlab": r"gitlab\.com/[\w-]+",
"pypi": r"pypi\.org/project/[\w-]+",
}
def extract_links(html, base_url):
"""Extract all links from HTML content."""
links = set()
for match in re.finditer(r'href=["\']([^"\']+)', html):
link = match.group(1)
if link.startswith(("http://", "https://")):
links.add(link)
elif link.startswith("/"):
links.add(urljoin(base_url, link))
for match in re.finditer(r'src=["\']([^"\']+)', html):
link = match.group(1)
if link.startswith(("http://", "https://")):
links.add(link)
return links
def check_link_status(url):
"""Check if a URL is reachable and categorize its status."""
if not HAS_REQUESTS:
return {"url": url, "status": "unknown", "note": "requests library not available"}
try:
resp = requests.head(url, timeout=10, allow_redirects=True, verify=False)
return {
"url": url,
"status_code": resp.status_code,
"final_url": resp.url,
"broken": resp.status_code == 404,
}
except requests.ConnectionError:
return {"url": url, "status_code": None, "broken": True, "error": "connection_failed"}
except requests.Timeout:
return {"url": url, "status_code": None, "broken": False, "error": "timeout"}
except requests.RequestException as e:
return {"url": url, "status_code": None, "broken": False, "error": str(e)[:100]}
def check_hijackable(link_status):
"""Determine if a broken link is hijackable."""
url = link_status.get("url", "")
if not link_status.get("broken"):
return None
parsed = urlparse(url)
domain = parsed.netloc.lower()
for platform, pattern in HIJACKABLE_PATTERNS.items():
if re.search(pattern, url):
return {
"url": url,
"platform": platform,
"domain": domain,
"hijack_type": f"Register {platform} account/resource",
"severity": "HIGH",
}
if link_status.get("error") == "connection_failed":
return {
"url": url,
"domain": domain,
"hijack_type": "Domain takeover (unregistered domain)",
"severity": "CRITICAL",
}
return None
def scan_website(target_url):
"""Scan a website for broken link hijacking opportunities."""
if not HAS_REQUESTS:
return []
findings = []
try:
resp = requests.get(target_url, timeout=15, verify=False)
links = extract_links(resp.text, target_url)
except requests.RequestException:
return findings
external_links = [l for l in links if urlparse(l).netloc != urlparse(target_url).netloc]
print(f"[*] Found {len(external_links)} external links")
for link in external_links:
status = check_link_status(link)
hijack = check_hijackable(status)
if hijack:
findings.append(hijack)
return findings
def main():
parser = argparse.ArgumentParser(
description="Detect broken link hijacking vulnerabilities"
)
parser.add_argument("--url", required=True, help="Target website URL")
parser.add_argument("--output", "-o", help="Output JSON report")
parser.add_argument("--verbose", "-v", action="store_true")
args = parser.parse_args()
print("[*] Broken Link Hijacking Detection Agent")
findings = scan_website(args.url)
report = {
"timestamp": datetime.now(timezone.utc).isoformat(),
"target": args.url,
"hijackable_links": findings,
"count": len(findings),
"risk_level": "CRITICAL" if any(f["severity"] == "CRITICAL" for f in findings) else "HIGH" if findings else "LOW",
}
print(f"[*] Hijackable links found: {len(findings)}")
if args.verbose:
for f in findings:
print(f" [{f['severity']}] {f['url']} - {f['hijack_type']}")
if args.output:
with open(args.output, "w") as f:
json.dump(report, f, indent=2)
print(f"[*] Report saved to {args.output}")
else:
print(json.dumps(report, indent=2))
if __name__ == "__main__":
main()