mirror of
https://github.com/mukul975/Anthropic-Cybersecurity-Skills.git
synced 2026-06-11 13:44:56 +03:00
342 lines
12 KiB
Python
342 lines
12 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
QR Code Phishing (Quishing) Detection Engine
|
|
|
|
Detects QR codes in images and email content, extracts encoded URLs,
|
|
and checks them against known phishing indicators.
|
|
|
|
Usage:
|
|
python process.py scan-image --image qr_image.png
|
|
python process.py scan-email --eml-file message.eml
|
|
python process.py check-url --url "https://example.com/login"
|
|
"""
|
|
|
|
import argparse
|
|
import json
|
|
import re
|
|
import sys
|
|
import base64
|
|
from dataclasses import dataclass, field, asdict
|
|
from urllib.parse import urlparse
|
|
|
|
try:
|
|
from PIL import Image
|
|
HAS_PIL = True
|
|
except ImportError:
|
|
HAS_PIL = False
|
|
|
|
try:
|
|
from pyzbar.pyzbar import decode as qr_decode
|
|
HAS_PYZBAR = True
|
|
except ImportError:
|
|
HAS_PYZBAR = False
|
|
|
|
|
|
@dataclass
|
|
class QRCodeFinding:
|
|
"""A detected QR code and its analysis."""
|
|
source: str = ""
|
|
decoded_url: str = ""
|
|
domain: str = ""
|
|
is_suspicious: bool = False
|
|
risk_score: int = 0
|
|
indicators: list = field(default_factory=list)
|
|
|
|
|
|
@dataclass
|
|
class QuishingAnalysis:
|
|
"""Complete quishing analysis result."""
|
|
source_file: str = ""
|
|
qr_codes_found: int = 0
|
|
findings: list = field(default_factory=list)
|
|
email_indicators: list = field(default_factory=list)
|
|
overall_risk: str = "low"
|
|
recommended_action: str = ""
|
|
|
|
|
|
# Suspicious URL patterns for credential phishing
|
|
SUSPICIOUS_URL_PATTERNS = [
|
|
r'login|signin|sign-in|log-in',
|
|
r'verify|verification|validate',
|
|
r'account|password|credential',
|
|
r'microsoft|office365|outlook|sharepoint',
|
|
r'google|gmail|workspace',
|
|
r'secure|security|auth',
|
|
r'update|confirm|suspend',
|
|
r'\.tk$|\.ml$|\.ga$|\.cf$|\.gq$',
|
|
r'bit\.ly|tinyurl|t\.co|is\.gd|cutt\.ly',
|
|
]
|
|
|
|
# Known phishing infrastructure patterns
|
|
PHISHING_INFRA_PATTERNS = [
|
|
r'\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}', # IP address URLs
|
|
r'[a-z0-9]{20,}\.web\.app', # Firebase hosting abuse
|
|
r'[a-z0-9]{20,}\.netlify\.app', # Netlify abuse
|
|
r'[a-z0-9-]+\.glitch\.me', # Glitch abuse
|
|
r'[a-z0-9-]+\.workers\.dev', # Cloudflare workers abuse
|
|
]
|
|
|
|
# Common quishing email indicators
|
|
QUISHING_EMAIL_PATTERNS = [
|
|
r'scan\s+(this|the)\s+qr\s+code',
|
|
r'scan\s+to\s+(verify|authenticate|confirm|access)',
|
|
r'multi.?factor\s+authentication',
|
|
r'mfa\s+(setup|enrollment|reset|update)',
|
|
r'voicemail\s+(notification|message)',
|
|
r'document\s+(sign|signing|review)',
|
|
r'security\s+update\s+required',
|
|
r'action\s+required',
|
|
]
|
|
|
|
|
|
def analyze_url(url: str) -> QRCodeFinding:
|
|
"""Analyze a URL extracted from a QR code for phishing indicators."""
|
|
finding = QRCodeFinding(decoded_url=url)
|
|
|
|
try:
|
|
parsed = urlparse(url)
|
|
finding.domain = parsed.netloc
|
|
except Exception:
|
|
finding.indicators.append("Could not parse URL")
|
|
finding.is_suspicious = True
|
|
finding.risk_score = 50
|
|
return finding
|
|
|
|
score = 0
|
|
|
|
# Check suspicious URL patterns
|
|
url_lower = url.lower()
|
|
for pattern in SUSPICIOUS_URL_PATTERNS:
|
|
if re.search(pattern, url_lower):
|
|
finding.indicators.append(f"Suspicious URL pattern: {pattern}")
|
|
score += 15
|
|
|
|
# Check phishing infrastructure patterns
|
|
for pattern in PHISHING_INFRA_PATTERNS:
|
|
if re.search(pattern, url_lower):
|
|
finding.indicators.append(f"Known phishing infrastructure pattern: {pattern}")
|
|
score += 25
|
|
|
|
# Check for URL shorteners (hiding true destination)
|
|
shorteners = ['bit.ly', 'tinyurl.com', 't.co', 'is.gd', 'cutt.ly',
|
|
'rebrand.ly', 'ow.ly', 'buff.ly']
|
|
if finding.domain in shorteners:
|
|
finding.indicators.append(f"URL shortener detected: {finding.domain}")
|
|
score += 20
|
|
|
|
# Check for IP address URL
|
|
if re.match(r'^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}', finding.domain):
|
|
finding.indicators.append("URL uses IP address instead of domain name")
|
|
score += 30
|
|
|
|
# Check for excessive subdomains (common in phishing)
|
|
subdomain_count = finding.domain.count('.')
|
|
if subdomain_count > 3:
|
|
finding.indicators.append(f"Excessive subdomains ({subdomain_count})")
|
|
score += 15
|
|
|
|
# Check for homoglyph characters in domain
|
|
non_ascii = [c for c in finding.domain if ord(c) > 127]
|
|
if non_ascii:
|
|
finding.indicators.append("Non-ASCII characters in domain (possible homoglyph)")
|
|
score += 25
|
|
|
|
# Check protocol
|
|
if not url.startswith('https://'):
|
|
finding.indicators.append("URL does not use HTTPS")
|
|
score += 10
|
|
|
|
finding.risk_score = min(score, 100)
|
|
finding.is_suspicious = score >= 30
|
|
|
|
return finding
|
|
|
|
|
|
def scan_image_for_qr(image_path: str) -> list:
|
|
"""Scan an image file for QR codes and extract URLs."""
|
|
findings = []
|
|
|
|
if not HAS_PIL:
|
|
print("Pillow not installed. Install with: pip install Pillow", file=sys.stderr)
|
|
return findings
|
|
if not HAS_PYZBAR:
|
|
print("pyzbar not installed. Install with: pip install pyzbar", file=sys.stderr)
|
|
return findings
|
|
|
|
try:
|
|
img = Image.open(image_path)
|
|
decoded_objects = qr_decode(img)
|
|
|
|
for obj in decoded_objects:
|
|
data = obj.data.decode('utf-8', errors='replace')
|
|
if data.startswith(('http://', 'https://', 'www.')):
|
|
url = data if data.startswith('http') else f'https://{data}'
|
|
finding = analyze_url(url)
|
|
finding.source = image_path
|
|
findings.append(finding)
|
|
else:
|
|
finding = QRCodeFinding(
|
|
source=image_path,
|
|
decoded_url=data,
|
|
indicators=["QR code contains non-URL data"],
|
|
risk_score=10
|
|
)
|
|
findings.append(finding)
|
|
|
|
except Exception as e:
|
|
print(f"Error scanning image: {e}", file=sys.stderr)
|
|
|
|
return findings
|
|
|
|
|
|
def scan_email_content(eml_content: str) -> QuishingAnalysis:
|
|
"""Analyze email content for quishing indicators."""
|
|
analysis = QuishingAnalysis()
|
|
body_lower = eml_content.lower()
|
|
|
|
# Check for quishing email patterns
|
|
for pattern in QUISHING_EMAIL_PATTERNS:
|
|
if re.search(pattern, body_lower):
|
|
analysis.email_indicators.append(f"Quishing language pattern: {pattern}")
|
|
|
|
# Check for image-heavy email with minimal text
|
|
text_content = re.sub(r'<[^>]+>', '', eml_content)
|
|
text_content = re.sub(r'\s+', ' ', text_content).strip()
|
|
has_images = bool(re.search(r'<img|Content-Type:\s*image/', eml_content, re.IGNORECASE))
|
|
text_words = len(text_content.split())
|
|
|
|
if has_images and text_words < 50:
|
|
analysis.email_indicators.append(
|
|
"Image-heavy email with minimal text (common quishing pattern)"
|
|
)
|
|
|
|
# Check for base64-encoded images
|
|
b64_images = re.findall(
|
|
r'Content-Type:\s*image/\w+.*?Content-Transfer-Encoding:\s*base64\s*\n\n([\w+/=\n]+)',
|
|
eml_content, re.DOTALL | re.IGNORECASE
|
|
)
|
|
|
|
if b64_images and HAS_PIL and HAS_PYZBAR:
|
|
for i, b64_data in enumerate(b64_images):
|
|
try:
|
|
clean_data = b64_data.replace('\n', '')
|
|
img_bytes = base64.b64decode(clean_data)
|
|
import io
|
|
img = Image.open(io.BytesIO(img_bytes))
|
|
decoded = qr_decode(img)
|
|
for obj in decoded:
|
|
data = obj.data.decode('utf-8', errors='replace')
|
|
if data.startswith(('http://', 'https://')):
|
|
finding = analyze_url(data)
|
|
finding.source = f"embedded_image_{i}"
|
|
analysis.findings.append(finding)
|
|
analysis.qr_codes_found += 1
|
|
except Exception:
|
|
continue
|
|
|
|
# Calculate overall risk
|
|
indicator_count = len(analysis.email_indicators)
|
|
has_suspicious_qr = any(f.is_suspicious for f in analysis.findings)
|
|
|
|
if has_suspicious_qr and indicator_count >= 2:
|
|
analysis.overall_risk = "critical"
|
|
analysis.recommended_action = "BLOCK and alert SOC"
|
|
elif has_suspicious_qr or indicator_count >= 3:
|
|
analysis.overall_risk = "high"
|
|
analysis.recommended_action = "QUARANTINE for manual review"
|
|
elif indicator_count >= 1:
|
|
analysis.overall_risk = "medium"
|
|
analysis.recommended_action = "TAG with QR phishing warning banner"
|
|
else:
|
|
analysis.overall_risk = "low"
|
|
analysis.recommended_action = "DELIVER normally"
|
|
|
|
return analysis
|
|
|
|
|
|
def format_report(analysis: QuishingAnalysis) -> str:
|
|
"""Format analysis as readable report."""
|
|
lines = []
|
|
lines.append("=" * 60)
|
|
lines.append(" QR CODE PHISHING (QUISHING) ANALYSIS REPORT")
|
|
lines.append("=" * 60)
|
|
lines.append(f" QR Codes Found: {analysis.qr_codes_found}")
|
|
lines.append(f" Overall Risk: {analysis.overall_risk.upper()}")
|
|
lines.append(f" Action: {analysis.recommended_action}")
|
|
|
|
if analysis.email_indicators:
|
|
lines.append(f"\n [EMAIL INDICATORS] ({len(analysis.email_indicators)})")
|
|
for i, ind in enumerate(analysis.email_indicators, 1):
|
|
lines.append(f" {i}. {ind}")
|
|
|
|
if analysis.findings:
|
|
lines.append(f"\n [QR CODE FINDINGS] ({len(analysis.findings)})")
|
|
for i, finding in enumerate(analysis.findings, 1):
|
|
lines.append(f" {i}. URL: {finding.decoded_url}")
|
|
lines.append(f" Domain: {finding.domain}")
|
|
lines.append(f" Risk Score: {finding.risk_score}/100")
|
|
lines.append(f" Suspicious: {'YES' if finding.is_suspicious else 'No'}")
|
|
for ind in finding.indicators:
|
|
lines.append(f" - {ind}")
|
|
|
|
lines.append("=" * 60)
|
|
return "\n".join(lines)
|
|
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser(description="QR Code Phishing Detection")
|
|
subparsers = parser.add_subparsers(dest="command")
|
|
|
|
img_parser = subparsers.add_parser("scan-image", help="Scan image for QR codes")
|
|
img_parser.add_argument("--image", required=True)
|
|
|
|
eml_parser = subparsers.add_parser("scan-email", help="Scan email for quishing")
|
|
eml_parser.add_argument("--eml-file", required=True)
|
|
|
|
url_parser = subparsers.add_parser("check-url", help="Check URL from QR code")
|
|
url_parser.add_argument("--url", required=True)
|
|
|
|
parser.add_argument("--json", action="store_true")
|
|
args = parser.parse_args()
|
|
|
|
if args.command == "scan-image":
|
|
findings = scan_image_for_qr(args.image)
|
|
analysis = QuishingAnalysis(
|
|
source_file=args.image,
|
|
qr_codes_found=len(findings),
|
|
findings=findings
|
|
)
|
|
if args.json:
|
|
print(json.dumps(asdict(analysis), indent=2))
|
|
else:
|
|
print(format_report(analysis))
|
|
|
|
elif args.command == "scan-email":
|
|
with open(args.eml_file, 'r', errors='replace') as f:
|
|
content = f.read()
|
|
analysis = scan_email_content(content)
|
|
analysis.source_file = args.eml_file
|
|
if args.json:
|
|
print(json.dumps(asdict(analysis), indent=2))
|
|
else:
|
|
print(format_report(analysis))
|
|
|
|
elif args.command == "check-url":
|
|
finding = analyze_url(args.url)
|
|
if args.json:
|
|
print(json.dumps(asdict(finding), indent=2))
|
|
else:
|
|
print(f"URL: {finding.decoded_url}")
|
|
print(f"Domain: {finding.domain}")
|
|
print(f"Risk Score: {finding.risk_score}/100")
|
|
print(f"Suspicious: {'YES' if finding.is_suspicious else 'No'}")
|
|
for ind in finding.indicators:
|
|
print(f" - {ind}")
|
|
|
|
else:
|
|
parser.print_help()
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|