#!/usr/bin/env python3 """BEC detection agent - analyzes email headers and content for Business Email Compromise indicators. Parses email headers for spoofing signals, checks DMARC/SPF/DKIM alignment, detects urgency language patterns, and flags financial request anomalies. """ import argparse import email import json import re from email import policy from pathlib import Path BEC_URGENCY_PATTERNS = [ r"\b(urgent|immediately|asap|right away|time.?sensitive)\b", r"\b(confidential|do not share|keep this between us|don't tell)\b", r"\b(wire transfer|bank transfer|payment|invoice|routing number)\b", r"\b(gift card|bitcoin|crypto|western union|moneygram)\b", r"\b(ceo|cfo|president|director) (asked|requested|needs|wants)\b", r"\b(change.*(bank|account|payment)|new.*(bank|account|routing))\b", r"\b(act now|deadline today|end of day|before close)\b", ] EXECUTIVE_TITLES = ["ceo", "cfo", "coo", "cto", "president", "chairman", "managing director", "vice president", "vp", "director"] def parse_email_file(filepath): with open(filepath, "r", encoding="utf-8", errors="replace") as f: return email.message_from_file(f, policy=policy.default) def check_spf_dkim_dmarc(msg): results = {"spf": "none", "dkim": "none", "dmarc": "none"} auth_results = msg.get("Authentication-Results", "") if "spf=pass" in auth_results.lower(): results["spf"] = "pass" elif "spf=fail" in auth_results.lower(): results["spf"] = "fail" if "dkim=pass" in auth_results.lower(): results["dkim"] = "pass" elif "dkim=fail" in auth_results.lower(): results["dkim"] = "fail" if "dmarc=pass" in auth_results.lower(): results["dmarc"] = "pass" elif "dmarc=fail" in auth_results.lower(): results["dmarc"] = "fail" return results def check_display_name_spoofing(msg, vip_names): from_header = msg.get("From", "") match = re.match(r'"?([^"<]+)"?\s*<([^>]+)>', from_header) if not match: return None display_name = match.group(1).strip().lower() email_addr = match.group(2).strip().lower() for vip in vip_names: if vip.lower() in display_name: domain = email_addr.split("@")[-1] if "@" in email_addr else "" return {"display_name": display_name, "email": email_addr, "matched_vip": vip, "domain": domain, "indicator": "Display name matches VIP but email may be external"} return None def check_reply_to_mismatch(msg): from_addr = msg.get("From", "") reply_to = msg.get("Reply-To", "") if not reply_to: return None from_match = re.search(r'<([^>]+)>', from_addr) or re.search(r'(\S+@\S+)', from_addr) reply_match = re.search(r'<([^>]+)>', reply_to) or re.search(r'(\S+@\S+)', reply_to) if from_match and reply_match: from_email = from_match.group(1).lower() reply_email = reply_match.group(1).lower() from_domain = from_email.split("@")[-1] reply_domain = reply_email.split("@")[-1] if from_domain != reply_domain: return {"from": from_email, "reply_to": reply_email, "indicator": "Reply-To domain differs from From domain"} return None def detect_urgency_language(body): matches = [] for pattern in BEC_URGENCY_PATTERNS: found = re.findall(pattern, body, re.IGNORECASE) if found: matches.extend(found) return matches def calculate_bec_score(auth, spoofing, reply_mismatch, urgency_matches): score = 0 if auth.get("spf") == "fail": score += 25 if auth.get("dkim") == "fail": score += 20 if auth.get("dmarc") == "fail": score += 30 if spoofing: score += 35 if reply_mismatch: score += 25 score += min(len(urgency_matches) * 10, 40) return min(score, 100) def analyze_email(filepath, vip_names): msg = parse_email_file(filepath) body = msg.get_body(preferencelist=("plain", "html")) body_text = body.get_content() if body else "" auth = check_spf_dkim_dmarc(msg) spoofing = check_display_name_spoofing(msg, vip_names) reply_mismatch = check_reply_to_mismatch(msg) urgency = detect_urgency_language(body_text) score = calculate_bec_score(auth, spoofing, reply_mismatch, urgency) risk = "CRITICAL" if score >= 70 else "HIGH" if score >= 50 else "MEDIUM" if score >= 30 else "LOW" return { "file": str(filepath), "from": msg.get("From", ""), "to": msg.get("To", ""), "subject": msg.get("Subject", ""), "date": msg.get("Date", ""), "authentication": auth, "display_name_spoofing": spoofing, "reply_to_mismatch": reply_mismatch, "urgency_indicators": urgency, "bec_score": score, "risk_level": risk, } def main(): parser = argparse.ArgumentParser(description="BEC Email Analyzer") parser.add_argument("--email-file", required=True, help="Path to .eml file") parser.add_argument("--vip-names", nargs="+", default=[], help="VIP display names to check") parser.add_argument("--scan-dir", help="Scan all .eml files in directory") args = parser.parse_args() results = [] if args.scan_dir: for eml in Path(args.scan_dir).glob("*.eml"): results.append(analyze_email(str(eml), args.vip_names)) else: results.append(analyze_email(args.email_file, args.vip_names)) print(json.dumps(results, indent=2)) if __name__ == "__main__": main()