mirror of
https://github.com/mukul975/Anthropic-Cybersecurity-Skills.git
synced 2026-06-10 21:24:56 +03:00
567 lines
20 KiB
Python
567 lines
20 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Phishing Email Header Analyzer
|
|
|
|
Parses raw email headers to extract authentication results, routing information,
|
|
and phishing indicators. Performs IP geolocation, domain age checks, and
|
|
generates a risk assessment report.
|
|
|
|
Usage:
|
|
python process.py --file email_headers.txt
|
|
python process.py --eml suspicious_email.eml
|
|
python process.py --stdin < headers.txt
|
|
"""
|
|
|
|
import argparse
|
|
import email
|
|
import re
|
|
import json
|
|
import sys
|
|
import socket
|
|
import hashlib
|
|
from datetime import datetime, timezone
|
|
from email import policy
|
|
from email.parser import HeaderParser, BytesParser
|
|
from pathlib import Path
|
|
from typing import Optional
|
|
from dataclasses import dataclass, field, asdict
|
|
|
|
try:
|
|
import requests
|
|
HAS_REQUESTS = True
|
|
except ImportError:
|
|
HAS_REQUESTS = False
|
|
|
|
|
|
@dataclass
|
|
class ReceivedHop:
|
|
"""Represents a single hop in the email routing chain."""
|
|
server_from: str = ""
|
|
server_by: str = ""
|
|
ip_address: str = ""
|
|
timestamp: str = ""
|
|
protocol: str = ""
|
|
hop_number: int = 0
|
|
geo_location: str = ""
|
|
reverse_dns: str = ""
|
|
|
|
|
|
@dataclass
|
|
class AuthenticationResult:
|
|
"""Email authentication check results."""
|
|
spf: str = "none"
|
|
spf_domain: str = ""
|
|
dkim: str = "none"
|
|
dkim_domain: str = ""
|
|
dmarc: str = "none"
|
|
dmarc_domain: str = ""
|
|
compauth: str = ""
|
|
|
|
|
|
@dataclass
|
|
class PhishingIndicator:
|
|
"""A single phishing indicator found in headers."""
|
|
category: str = ""
|
|
description: str = ""
|
|
severity: str = "low" # low, medium, high, critical
|
|
raw_value: str = ""
|
|
|
|
|
|
@dataclass
|
|
class HeaderAnalysis:
|
|
"""Complete header analysis results."""
|
|
message_id: str = ""
|
|
from_address: str = ""
|
|
from_domain: str = ""
|
|
return_path: str = ""
|
|
return_path_domain: str = ""
|
|
reply_to: str = ""
|
|
reply_to_domain: str = ""
|
|
subject: str = ""
|
|
date: str = ""
|
|
x_originating_ip: str = ""
|
|
x_mailer: str = ""
|
|
received_hops: list = field(default_factory=list)
|
|
authentication: AuthenticationResult = field(default_factory=AuthenticationResult)
|
|
indicators: list = field(default_factory=list)
|
|
risk_score: int = 0
|
|
risk_level: str = "unknown"
|
|
urls_in_headers: list = field(default_factory=list)
|
|
file_hash: str = ""
|
|
|
|
|
|
def extract_ip_from_received(received_value: str) -> str:
|
|
"""Extract IP address from a Received header value."""
|
|
ip_patterns = [
|
|
r'\[(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})\]',
|
|
r'\((\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})\)',
|
|
r'from\s+\S+\s+\(.*?(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})',
|
|
]
|
|
for pattern in ip_patterns:
|
|
match = re.search(pattern, received_value)
|
|
if match:
|
|
ip = match.group(1)
|
|
if not ip.startswith(('10.', '172.16.', '172.17.', '172.18.',
|
|
'172.19.', '172.2', '172.30.', '172.31.',
|
|
'192.168.', '127.')):
|
|
return ip
|
|
return ""
|
|
|
|
|
|
def extract_domain(email_address: str) -> str:
|
|
"""Extract domain from an email address."""
|
|
if not email_address:
|
|
return ""
|
|
match = re.search(r'@([\w.-]+)', email_address)
|
|
return match.group(1).lower() if match else ""
|
|
|
|
|
|
def parse_received_header(received_value: str, hop_num: int) -> ReceivedHop:
|
|
"""Parse a single Received header into structured data."""
|
|
hop = ReceivedHop(hop_number=hop_num)
|
|
|
|
from_match = re.search(r'from\s+([\w.\-]+)', received_value, re.IGNORECASE)
|
|
if from_match:
|
|
hop.server_from = from_match.group(1)
|
|
|
|
by_match = re.search(r'by\s+([\w.\-]+)', received_value, re.IGNORECASE)
|
|
if by_match:
|
|
hop.server_by = by_match.group(1)
|
|
|
|
hop.ip_address = extract_ip_from_received(received_value)
|
|
|
|
date_match = re.search(r';\s*(.+)$', received_value)
|
|
if date_match:
|
|
hop.timestamp = date_match.group(1).strip()
|
|
|
|
proto_match = re.search(r'with\s+(ESMTP[SA]*|SMTP[SA]*|HTTP[S]?|LMTP)',
|
|
received_value, re.IGNORECASE)
|
|
if proto_match:
|
|
hop.protocol = proto_match.group(1).upper()
|
|
|
|
return hop
|
|
|
|
|
|
def parse_authentication_results(auth_header: str) -> AuthenticationResult:
|
|
"""Parse Authentication-Results header."""
|
|
result = AuthenticationResult()
|
|
|
|
spf_match = re.search(r'spf=(pass|fail|softfail|neutral|none|temperror|permerror)',
|
|
auth_header, re.IGNORECASE)
|
|
if spf_match:
|
|
result.spf = spf_match.group(1).lower()
|
|
|
|
spf_domain_match = re.search(r'smtp\.mailfrom=([\w.\-@]+)', auth_header, re.IGNORECASE)
|
|
if spf_domain_match:
|
|
result.spf_domain = spf_domain_match.group(1)
|
|
|
|
dkim_match = re.search(r'dkim=(pass|fail|none|neutral|temperror|permerror)',
|
|
auth_header, re.IGNORECASE)
|
|
if dkim_match:
|
|
result.dkim = dkim_match.group(1).lower()
|
|
|
|
dkim_domain_match = re.search(r'header\.[di]=([\w.\-]+)', auth_header, re.IGNORECASE)
|
|
if dkim_domain_match:
|
|
result.dkim_domain = dkim_domain_match.group(1)
|
|
|
|
dmarc_match = re.search(r'dmarc=(pass|fail|none|bestguesspass|temperror|permerror)',
|
|
auth_header, re.IGNORECASE)
|
|
if dmarc_match:
|
|
result.dmarc = dmarc_match.group(1).lower()
|
|
|
|
dmarc_domain_match = re.search(r'header\.from=([\w.\-]+)', auth_header, re.IGNORECASE)
|
|
if dmarc_domain_match:
|
|
result.dmarc_domain = dmarc_domain_match.group(1)
|
|
|
|
compauth_match = re.search(r'compauth=(\w+)', auth_header, re.IGNORECASE)
|
|
if compauth_match:
|
|
result.compauth = compauth_match.group(1)
|
|
|
|
return result
|
|
|
|
|
|
def geolocate_ip(ip_address: str) -> str:
|
|
"""Geolocate an IP address using ip-api.com (free, no key required)."""
|
|
if not HAS_REQUESTS or not ip_address:
|
|
return "unknown"
|
|
try:
|
|
resp = requests.get(f"http://ip-api.com/json/{ip_address}",
|
|
timeout=5,
|
|
params={"fields": "country,city,org,status"})
|
|
if resp.status_code == 200:
|
|
data = resp.json()
|
|
if data.get("status") == "success":
|
|
return f"{data.get('city', '')}, {data.get('country', '')} ({data.get('org', '')})"
|
|
except Exception:
|
|
pass
|
|
return "unknown"
|
|
|
|
|
|
def reverse_dns_lookup(ip_address: str) -> str:
|
|
"""Perform reverse DNS lookup on an IP address."""
|
|
if not ip_address:
|
|
return ""
|
|
try:
|
|
hostname = socket.gethostbyaddr(ip_address)
|
|
return hostname[0]
|
|
except (socket.herror, socket.gaierror, OSError):
|
|
return ""
|
|
|
|
|
|
def check_abuseipdb(ip_address: str, api_key: str = "") -> dict:
|
|
"""Check IP against AbuseIPDB (requires API key)."""
|
|
if not HAS_REQUESTS or not api_key or not ip_address:
|
|
return {}
|
|
try:
|
|
headers = {"Key": api_key, "Accept": "application/json"}
|
|
params = {"ipAddress": ip_address, "maxAgeInDays": "90"}
|
|
resp = requests.get("https://api.abuseipdb.com/api/v2/check",
|
|
headers=headers, params=params, timeout=10)
|
|
if resp.status_code == 200:
|
|
return resp.json().get("data", {})
|
|
except Exception:
|
|
pass
|
|
return {}
|
|
|
|
|
|
def analyze_indicators(analysis: HeaderAnalysis) -> list:
|
|
"""Detect phishing indicators from parsed header data."""
|
|
indicators = []
|
|
|
|
# Check From vs Return-Path mismatch
|
|
if (analysis.from_domain and analysis.return_path_domain and
|
|
analysis.from_domain != analysis.return_path_domain):
|
|
indicators.append(PhishingIndicator(
|
|
category="sender_mismatch",
|
|
description=f"From domain ({analysis.from_domain}) differs from "
|
|
f"Return-Path domain ({analysis.return_path_domain})",
|
|
severity="high",
|
|
raw_value=f"From: {analysis.from_domain}, Return-Path: {analysis.return_path_domain}"
|
|
))
|
|
|
|
# Check From vs Reply-To mismatch
|
|
if (analysis.from_domain and analysis.reply_to_domain and
|
|
analysis.from_domain != analysis.reply_to_domain):
|
|
indicators.append(PhishingIndicator(
|
|
category="reply_to_mismatch",
|
|
description=f"From domain ({analysis.from_domain}) differs from "
|
|
f"Reply-To domain ({analysis.reply_to_domain})",
|
|
severity="high",
|
|
raw_value=f"From: {analysis.from_domain}, Reply-To: {analysis.reply_to_domain}"
|
|
))
|
|
|
|
# Check SPF failure
|
|
if analysis.authentication.spf in ("fail", "softfail"):
|
|
indicators.append(PhishingIndicator(
|
|
category="authentication_failure",
|
|
description=f"SPF check returned {analysis.authentication.spf}",
|
|
severity="high" if analysis.authentication.spf == "fail" else "medium",
|
|
raw_value=f"spf={analysis.authentication.spf}"
|
|
))
|
|
|
|
# Check DKIM failure
|
|
if analysis.authentication.dkim == "fail":
|
|
indicators.append(PhishingIndicator(
|
|
category="authentication_failure",
|
|
description="DKIM signature verification failed",
|
|
severity="high",
|
|
raw_value="dkim=fail"
|
|
))
|
|
|
|
# Check DMARC failure
|
|
if analysis.authentication.dmarc == "fail":
|
|
indicators.append(PhishingIndicator(
|
|
category="authentication_failure",
|
|
description="DMARC policy check failed",
|
|
severity="critical",
|
|
raw_value="dmarc=fail"
|
|
))
|
|
|
|
# Check for missing Message-ID
|
|
if not analysis.message_id:
|
|
indicators.append(PhishingIndicator(
|
|
category="missing_header",
|
|
description="Message-ID header is missing",
|
|
severity="medium",
|
|
raw_value=""
|
|
))
|
|
|
|
# Check for suspicious X-Mailer
|
|
suspicious_mailers = [
|
|
"PHPMailer", "King Phisher", "GoPhish", "Swaks",
|
|
"Sendinblue", "Mass Mailer", "Bulk Mailer"
|
|
]
|
|
if analysis.x_mailer:
|
|
for mailer in suspicious_mailers:
|
|
if mailer.lower() in analysis.x_mailer.lower():
|
|
indicators.append(PhishingIndicator(
|
|
category="suspicious_mailer",
|
|
description=f"Suspicious X-Mailer detected: {analysis.x_mailer}",
|
|
severity="high",
|
|
raw_value=analysis.x_mailer
|
|
))
|
|
break
|
|
|
|
# Check for too few received hops (direct injection)
|
|
if len(analysis.received_hops) <= 1:
|
|
indicators.append(PhishingIndicator(
|
|
category="routing_anomaly",
|
|
description="Very few Received hops - possible direct SMTP injection",
|
|
severity="medium",
|
|
raw_value=f"Hop count: {len(analysis.received_hops)}"
|
|
))
|
|
|
|
# Check for missing authentication results
|
|
auth = analysis.authentication
|
|
if auth.spf == "none" and auth.dkim == "none" and auth.dmarc == "none":
|
|
indicators.append(PhishingIndicator(
|
|
category="no_authentication",
|
|
description="No email authentication results found (SPF, DKIM, DMARC all absent)",
|
|
severity="high",
|
|
raw_value=""
|
|
))
|
|
|
|
return indicators
|
|
|
|
|
|
def calculate_risk_score(indicators: list) -> tuple:
|
|
"""Calculate risk score from indicators. Returns (score, level)."""
|
|
severity_weights = {"critical": 30, "high": 20, "medium": 10, "low": 5}
|
|
score = 0
|
|
for indicator in indicators:
|
|
score += severity_weights.get(indicator.severity, 0)
|
|
|
|
score = min(score, 100)
|
|
|
|
if score >= 70:
|
|
level = "CRITICAL"
|
|
elif score >= 50:
|
|
level = "HIGH"
|
|
elif score >= 30:
|
|
level = "MEDIUM"
|
|
elif score >= 10:
|
|
level = "LOW"
|
|
else:
|
|
level = "CLEAN"
|
|
|
|
return score, level
|
|
|
|
|
|
def analyze_headers(raw_headers: str, enrich: bool = False,
|
|
abuseipdb_key: str = "") -> HeaderAnalysis:
|
|
"""
|
|
Main analysis function. Parses raw email headers and produces
|
|
a complete HeaderAnalysis report.
|
|
"""
|
|
analysis = HeaderAnalysis()
|
|
|
|
# Calculate hash of raw input for evidence tracking
|
|
analysis.file_hash = hashlib.sha256(raw_headers.encode()).hexdigest()
|
|
|
|
# Parse using Python's email library
|
|
parser = HeaderParser()
|
|
msg = parser.parsestr(raw_headers)
|
|
|
|
# Extract basic fields
|
|
analysis.from_address = msg.get("From", "")
|
|
analysis.from_domain = extract_domain(analysis.from_address)
|
|
analysis.return_path = msg.get("Return-Path", "")
|
|
analysis.return_path_domain = extract_domain(analysis.return_path)
|
|
analysis.reply_to = msg.get("Reply-To", "")
|
|
analysis.reply_to_domain = extract_domain(analysis.reply_to)
|
|
analysis.message_id = msg.get("Message-ID", "")
|
|
analysis.subject = msg.get("Subject", "")
|
|
analysis.date = msg.get("Date", "")
|
|
analysis.x_mailer = msg.get("X-Mailer", "") or msg.get("User-Agent", "")
|
|
|
|
# Extract X-Originating-IP
|
|
x_orig = msg.get("X-Originating-IP", "")
|
|
if x_orig:
|
|
ip_match = re.search(r'(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})', x_orig)
|
|
if ip_match:
|
|
analysis.x_originating_ip = ip_match.group(1)
|
|
|
|
# Parse Received headers (they appear in reverse order)
|
|
received_headers = msg.get_all("Received", [])
|
|
for i, received in enumerate(received_headers):
|
|
hop = parse_received_header(received, len(received_headers) - i)
|
|
if enrich and hop.ip_address:
|
|
hop.geo_location = geolocate_ip(hop.ip_address)
|
|
hop.reverse_dns = reverse_dns_lookup(hop.ip_address)
|
|
analysis.received_hops.append(hop)
|
|
|
|
# Reverse to chronological order (first hop first)
|
|
analysis.received_hops.reverse()
|
|
|
|
# Parse Authentication-Results
|
|
auth_results = msg.get("Authentication-Results", "")
|
|
if auth_results:
|
|
analysis.authentication = parse_authentication_results(auth_results)
|
|
|
|
# Also check ARC-Authentication-Results
|
|
arc_auth = msg.get("ARC-Authentication-Results", "")
|
|
if arc_auth and analysis.authentication.spf == "none":
|
|
analysis.authentication = parse_authentication_results(arc_auth)
|
|
|
|
# Extract URLs from headers
|
|
url_pattern = r'https?://[^\s<>"\')\]>]+'
|
|
all_header_text = raw_headers
|
|
analysis.urls_in_headers = list(set(re.findall(url_pattern, all_header_text)))
|
|
|
|
# Detect phishing indicators
|
|
analysis.indicators = analyze_indicators(analysis)
|
|
|
|
# Calculate risk score
|
|
analysis.risk_score, analysis.risk_level = calculate_risk_score(analysis.indicators)
|
|
|
|
# Enrich with threat intelligence if requested
|
|
if enrich and analysis.x_originating_ip and abuseipdb_key:
|
|
abuse_data = check_abuseipdb(analysis.x_originating_ip, abuseipdb_key)
|
|
if abuse_data and abuse_data.get("abuseConfidenceScore", 0) > 50:
|
|
analysis.indicators.append(PhishingIndicator(
|
|
category="threat_intelligence",
|
|
description=f"IP {analysis.x_originating_ip} has abuse confidence "
|
|
f"score of {abuse_data['abuseConfidenceScore']}%",
|
|
severity="critical",
|
|
raw_value=json.dumps(abuse_data)
|
|
))
|
|
# Recalculate risk
|
|
analysis.risk_score, analysis.risk_level = calculate_risk_score(analysis.indicators)
|
|
|
|
return analysis
|
|
|
|
|
|
def format_report(analysis: HeaderAnalysis) -> str:
|
|
"""Format analysis results as a human-readable report."""
|
|
lines = []
|
|
lines.append("=" * 70)
|
|
lines.append(" PHISHING EMAIL HEADER ANALYSIS REPORT")
|
|
lines.append("=" * 70)
|
|
lines.append(f" Generated: {datetime.now(timezone.utc).isoformat()}")
|
|
lines.append(f" Evidence Hash: {analysis.file_hash[:16]}...")
|
|
lines.append("")
|
|
|
|
# Risk Assessment
|
|
lines.append(f" RISK LEVEL: {analysis.risk_level} (Score: {analysis.risk_score}/100)")
|
|
lines.append("-" * 70)
|
|
|
|
# Sender Information
|
|
lines.append("\n[SENDER INFORMATION]")
|
|
lines.append(f" From: {analysis.from_address}")
|
|
lines.append(f" Return-Path: {analysis.return_path}")
|
|
lines.append(f" Reply-To: {analysis.reply_to}")
|
|
lines.append(f" Subject: {analysis.subject}")
|
|
lines.append(f" Date: {analysis.date}")
|
|
lines.append(f" Message-ID: {analysis.message_id}")
|
|
lines.append(f" X-Mailer: {analysis.x_mailer}")
|
|
if analysis.x_originating_ip:
|
|
lines.append(f" Origin IP: {analysis.x_originating_ip}")
|
|
|
|
# Authentication Results
|
|
lines.append("\n[AUTHENTICATION RESULTS]")
|
|
auth = analysis.authentication
|
|
spf_icon = "PASS" if auth.spf == "pass" else "FAIL" if auth.spf in ("fail", "softfail") else "NONE"
|
|
dkim_icon = "PASS" if auth.dkim == "pass" else "FAIL" if auth.dkim == "fail" else "NONE"
|
|
dmarc_icon = "PASS" if auth.dmarc == "pass" else "FAIL" if auth.dmarc == "fail" else "NONE"
|
|
lines.append(f" SPF: {spf_icon} ({auth.spf}) domain={auth.spf_domain}")
|
|
lines.append(f" DKIM: {dkim_icon} ({auth.dkim}) domain={auth.dkim_domain}")
|
|
lines.append(f" DMARC: {dmarc_icon} ({auth.dmarc}) domain={auth.dmarc_domain}")
|
|
|
|
# Routing Path
|
|
lines.append(f"\n[ROUTING PATH] ({len(analysis.received_hops)} hops)")
|
|
for hop in analysis.received_hops:
|
|
lines.append(f" Hop {hop.hop_number}: {hop.server_from} -> {hop.server_by}")
|
|
if hop.ip_address:
|
|
lines.append(f" IP: {hop.ip_address}")
|
|
if hop.geo_location and hop.geo_location != "unknown":
|
|
lines.append(f" Location: {hop.geo_location}")
|
|
if hop.protocol:
|
|
lines.append(f" Protocol: {hop.protocol}")
|
|
if hop.timestamp:
|
|
lines.append(f" Time: {hop.timestamp}")
|
|
|
|
# Phishing Indicators
|
|
if analysis.indicators:
|
|
lines.append(f"\n[PHISHING INDICATORS] ({len(analysis.indicators)} found)")
|
|
for i, ind in enumerate(analysis.indicators, 1):
|
|
lines.append(f" {i}. [{ind.severity.upper()}] {ind.description}")
|
|
if ind.raw_value:
|
|
lines.append(f" Value: {ind.raw_value}")
|
|
else:
|
|
lines.append("\n[PHISHING INDICATORS] None detected")
|
|
|
|
# URLs in Headers
|
|
if analysis.urls_in_headers:
|
|
lines.append(f"\n[URLS IN HEADERS] ({len(analysis.urls_in_headers)} found)")
|
|
for url in analysis.urls_in_headers[:10]:
|
|
lines.append(f" - {url}")
|
|
|
|
lines.append("\n" + "=" * 70)
|
|
lines.append(" END OF REPORT")
|
|
lines.append("=" * 70)
|
|
|
|
return "\n".join(lines)
|
|
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser(
|
|
description="Analyze email headers for phishing indicators"
|
|
)
|
|
input_group = parser.add_mutually_exclusive_group(required=True)
|
|
input_group.add_argument("--file", "-f", help="Path to file containing raw headers")
|
|
input_group.add_argument("--eml", "-e", help="Path to .eml file")
|
|
input_group.add_argument("--stdin", action="store_true", help="Read headers from stdin")
|
|
|
|
parser.add_argument("--enrich", action="store_true",
|
|
help="Enrich with IP geolocation and reverse DNS")
|
|
parser.add_argument("--abuseipdb-key", default="",
|
|
help="AbuseIPDB API key for threat intelligence")
|
|
parser.add_argument("--json", action="store_true",
|
|
help="Output results as JSON")
|
|
parser.add_argument("--output", "-o", help="Write report to file")
|
|
|
|
args = parser.parse_args()
|
|
|
|
# Read input
|
|
if args.stdin:
|
|
raw_headers = sys.stdin.read()
|
|
elif args.eml:
|
|
with open(args.eml, "rb") as f:
|
|
msg = BytesParser(policy=policy.default).parse(f)
|
|
raw_headers = str(msg)
|
|
else:
|
|
with open(args.file, "r", encoding="utf-8", errors="replace") as f:
|
|
raw_headers = f.read()
|
|
|
|
# Analyze
|
|
analysis = analyze_headers(
|
|
raw_headers,
|
|
enrich=args.enrich,
|
|
abuseipdb_key=args.abuseipdb_key
|
|
)
|
|
|
|
# Output
|
|
if args.json:
|
|
output = json.dumps(asdict(analysis), indent=2, default=str)
|
|
else:
|
|
output = format_report(analysis)
|
|
|
|
if args.output:
|
|
with open(args.output, "w", encoding="utf-8") as f:
|
|
f.write(output)
|
|
print(f"Report written to {args.output}")
|
|
else:
|
|
print(output)
|
|
|
|
# Exit code based on risk
|
|
if analysis.risk_level in ("CRITICAL", "HIGH"):
|
|
sys.exit(2)
|
|
elif analysis.risk_level == "MEDIUM":
|
|
sys.exit(1)
|
|
else:
|
|
sys.exit(0)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|