Files
mukul975 c47eed6a64 Production hardening: security fixes, code quality, 724 skills complete
- Fix 25 shell=True subprocess calls with list-based commands
- Fix 49 verify=False in defensive skills (env-var override)
- Add timeout to 231 HTTP/subprocess/socket calls
- Fix 6 SQL injection patterns with whitelist validation
- Replace 8 __import__() with standard imports
- Remove 701 unused imports across 442 files
- Add authorized-testing disclaimers to all offensive skills
- Complete 11 incomplete skill directories
- Expand 10 stub SKILL.md files with full content
- Fix 2 YAML parse errors in frontmatter
- Fix 5 pre-existing syntax errors
- Convert 22 hardcoded paths/ports to environment variables
- Back up 21 redundant skill pairs to .bak
- Fix 2 global declaration errors
- 724/724 skills with full folder anatomy (SKILL.md + agent.py + api-reference.md + LICENSE)
- 0 compile errors across all 724 agent.py files
2026-03-19 13:26:49 +01:00

198 lines
5.9 KiB
Python

#!/usr/bin/env python3
"""Agent for detecting QR code phishing (quishing) in email attachments and bodies."""
import argparse
import email
import json
import os
import re
from datetime import datetime, timezone
from email import policy
from urllib.parse import urlparse
try:
from PIL import Image
from pyzbar.pyzbar import decode as qr_decode
HAS_QR = True
except ImportError:
HAS_QR = False
try:
HAS_REQUESTS = True
except ImportError:
HAS_REQUESTS = False
SUSPICIOUS_TLDS = {
".xyz", ".top", ".club", ".work", ".buzz", ".tk", ".ml", ".ga", ".cf",
".gq", ".info", ".online", ".site", ".icu",
}
PHISHING_KEYWORDS = [
"verify", "account", "suspended", "confirm", "urgent", "expire",
"password", "login", "credential", "security", "update", "click",
"immediate", "unauthorized", "invoice",
]
def extract_images_from_eml(eml_path):
"""Extract image attachments and inline images from an .eml file."""
images = []
with open(eml_path, "rb") as f:
msg = email.message_from_binary_file(f, policy=policy.default)
for part in msg.walk():
content_type = part.get_content_type()
if content_type.startswith("image/"):
payload = part.get_payload(decode=True)
if payload:
ext = content_type.split("/")[1].split(";")[0]
fname = part.get_filename() or f"inline_image.{ext}"
images.append({"filename": fname, "data": payload, "type": content_type})
return images, msg
def decode_qr_from_bytes(image_data):
"""Decode QR codes from raw image bytes."""
if not HAS_QR:
return []
import io
img = Image.open(io.BytesIO(image_data))
results = qr_decode(img)
return [r.data.decode("utf-8", errors="replace") for r in results]
def analyze_url(url):
"""Score a URL for phishing risk indicators."""
indicators = []
parsed = urlparse(url)
domain = parsed.netloc.lower()
for tld in SUSPICIOUS_TLDS:
if domain.endswith(tld):
indicators.append(f"Suspicious TLD: {tld}")
break
if re.search(r"\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}", domain):
indicators.append("URL uses IP address instead of domain")
if len(domain) > 40:
indicators.append(f"Unusually long domain: {len(domain)} chars")
if domain.count(".") > 3:
indicators.append(f"Many subdomains: {domain.count('.')} dots")
if parsed.scheme == "http":
indicators.append("Uses HTTP instead of HTTPS")
path = parsed.path + (parsed.query or "")
for kw in PHISHING_KEYWORDS:
if kw in path.lower():
indicators.append(f"Phishing keyword in URL path: '{kw}'")
break
return {
"url": url,
"domain": domain,
"indicators": indicators,
"risk_score": min(len(indicators) * 25, 100),
}
def analyze_email(eml_path):
"""Full QR phishing analysis of an email file."""
results = {
"file": eml_path,
"timestamp": datetime.now(timezone.utc).isoformat(),
"images_found": 0,
"qr_codes_found": 0,
"urls_extracted": [],
"phishing_indicators": [],
"risk_level": "LOW",
}
images, msg = extract_images_from_eml(eml_path)
results["images_found"] = len(images)
results["subject"] = msg.get("Subject", "")
results["from"] = msg.get("From", "")
subject_lower = results["subject"].lower()
for kw in PHISHING_KEYWORDS:
if kw in subject_lower:
results["phishing_indicators"].append(f"Phishing keyword in subject: '{kw}'")
all_urls = []
for img_info in images:
decoded = decode_qr_from_bytes(img_info["data"])
for url in decoded:
if url.startswith(("http://", "https://")):
analysis = analyze_url(url)
all_urls.append(analysis)
results["qr_codes_found"] = len(all_urls)
results["urls_extracted"] = all_urls
max_risk = max((u["risk_score"] for u in all_urls), default=0)
if max_risk >= 75:
results["risk_level"] = "CRITICAL"
elif max_risk >= 50:
results["risk_level"] = "HIGH"
elif max_risk >= 25:
results["risk_level"] = "MEDIUM"
return results
def scan_directory(dir_path):
"""Scan a directory for .eml files and analyze each."""
all_results = []
for root, _, files in os.walk(dir_path):
for fname in files:
if fname.lower().endswith(".eml"):
fpath = os.path.join(root, fname)
result = analyze_email(fpath)
all_results.append(result)
return all_results
def main():
parser = argparse.ArgumentParser(
description="Detect QR code phishing (quishing) in emails"
)
parser.add_argument("input", help="Path to .eml file or directory of .eml files")
parser.add_argument("--output", "-o", help="Output JSON report path")
parser.add_argument("--verbose", "-v", action="store_true")
args = parser.parse_args()
print("[*] QR Code Phishing Detection Agent")
print(f"[*] QR decoding available: {HAS_QR}")
if os.path.isdir(args.input):
results = scan_directory(args.input)
else:
results = [analyze_email(args.input)]
report = {
"scan_time": datetime.now(timezone.utc).isoformat(),
"files_scanned": len(results),
"qr_phishing_detected": sum(1 for r in results if r["risk_level"] in ("HIGH", "CRITICAL")),
"results": results,
}
if args.verbose:
for r in results:
print(f"\n File: {r['file']}")
print(f" Subject: {r.get('subject', 'N/A')}")
print(f" Images: {r['images_found']}, QR codes: {r['qr_codes_found']}")
print(f" Risk: {r['risk_level']}")
if args.output:
with open(args.output, "w") as f:
json.dump(report, f, indent=2)
print(f"[*] Report saved to {args.output}")
else:
print(json.dumps(report, indent=2))
if __name__ == "__main__":
main()