Files
Anthropic-Cybersecurity-Skills/skills/implementing-dmarc-dkim-spf-email-security/scripts/process.py
T

598 lines
20 KiB
Python

#!/usr/bin/env python3
"""
DMARC/DKIM/SPF Validator and DMARC Report Parser
Validates email authentication DNS records and parses DMARC aggregate
XML reports to identify unauthorized senders and authentication failures.
Usage:
python process.py --check-domain example.com
python process.py --parse-report dmarc_report.xml
python process.py --parse-report-dir /path/to/reports/
"""
import argparse
import json
import sys
import xml.etree.ElementTree as ET
import gzip
import zipfile
import io
import os
from dataclasses import dataclass, field, asdict
from datetime import datetime, timezone
from pathlib import Path
from typing import Optional
from collections import defaultdict
try:
import dns.resolver
HAS_DNSPYTHON = True
except ImportError:
HAS_DNSPYTHON = False
try:
import requests
HAS_REQUESTS = True
except ImportError:
HAS_REQUESTS = False
@dataclass
class SPFRecord:
"""Parsed SPF record details."""
raw: str = ""
version: str = ""
mechanisms: list = field(default_factory=list)
includes: list = field(default_factory=list)
ip4_ranges: list = field(default_factory=list)
ip6_ranges: list = field(default_factory=list)
qualifier: str = ""
dns_lookup_count: int = 0
valid: bool = False
errors: list = field(default_factory=list)
@dataclass
class DKIMRecord:
"""Parsed DKIM record details."""
selector: str = ""
raw: str = ""
version: str = ""
key_type: str = ""
public_key: str = ""
key_length: int = 0
valid: bool = False
errors: list = field(default_factory=list)
@dataclass
class DMARCRecord:
"""Parsed DMARC record details."""
raw: str = ""
version: str = ""
policy: str = ""
subdomain_policy: str = ""
pct: int = 100
rua: list = field(default_factory=list)
ruf: list = field(default_factory=list)
adkim: str = "r"
aspf: str = "r"
fo: str = "0"
valid: bool = False
errors: list = field(default_factory=list)
@dataclass
class DMARCReportRecord:
"""Single record from a DMARC aggregate report."""
source_ip: str = ""
count: int = 0
disposition: str = ""
dkim_result: str = ""
dkim_domain: str = ""
spf_result: str = ""
spf_domain: str = ""
header_from: str = ""
envelope_from: str = ""
dkim_aligned: bool = False
spf_aligned: bool = False
@dataclass
class DMARCReportSummary:
"""Summary of a parsed DMARC aggregate report."""
org_name: str = ""
report_id: str = ""
date_begin: str = ""
date_end: str = ""
domain: str = ""
total_messages: int = 0
pass_count: int = 0
fail_count: int = 0
records: list = field(default_factory=list)
top_failing_ips: list = field(default_factory=list)
unauthorized_senders: list = field(default_factory=list)
def query_dns_txt(domain: str) -> list:
"""Query DNS TXT records for a domain."""
if HAS_DNSPYTHON:
try:
answers = dns.resolver.resolve(domain, "TXT")
results = []
for rdata in answers:
txt = b"".join(rdata.strings).decode("utf-8", errors="replace")
results.append(txt)
return results
except (dns.resolver.NXDOMAIN, dns.resolver.NoAnswer,
dns.resolver.NoNameservers, dns.resolver.Timeout):
return []
elif HAS_REQUESTS:
try:
resp = requests.get(
f"https://dns.google/resolve?name={domain}&type=TXT",
timeout=10
)
if resp.status_code == 200:
data = resp.json()
results = []
for answer in data.get("Answer", []):
txt = answer.get("data", "").strip('"')
results.append(txt)
return results
except Exception:
pass
return []
def check_spf(domain: str) -> SPFRecord:
"""Check and validate SPF record for a domain."""
record = SPFRecord()
txt_records = query_dns_txt(domain)
spf_records = [r for r in txt_records if r.startswith("v=spf1")]
if not spf_records:
record.errors.append("No SPF record found")
return record
if len(spf_records) > 1:
record.errors.append(f"Multiple SPF records found ({len(spf_records)}) - RFC violation")
record.raw = spf_records[0]
record.version = "spf1"
parts = record.raw.split()
lookup_count = 0
for part in parts[1:]:
if part.startswith("include:"):
domain_ref = part.split(":", 1)[1]
record.includes.append(domain_ref)
record.mechanisms.append(part)
lookup_count += 1
elif part.startswith("ip4:"):
record.ip4_ranges.append(part.split(":", 1)[1])
record.mechanisms.append(part)
elif part.startswith("ip6:"):
record.ip6_ranges.append(part.split(":", 1)[1])
record.mechanisms.append(part)
elif part.startswith(("a:", "a")):
record.mechanisms.append(part)
lookup_count += 1
elif part.startswith(("mx:", "mx")):
record.mechanisms.append(part)
lookup_count += 1
elif part.startswith("redirect="):
record.mechanisms.append(part)
lookup_count += 1
elif part.startswith("exists:"):
record.mechanisms.append(part)
lookup_count += 1
elif part in ("-all", "~all", "?all", "+all"):
record.qualifier = part
record.dns_lookup_count = lookup_count
if lookup_count > 10:
record.errors.append(f"SPF exceeds 10 DNS lookup limit ({lookup_count} lookups)")
if record.qualifier == "+all":
record.errors.append("SPF uses +all which allows any sender (insecure)")
elif record.qualifier == "?all":
record.errors.append("SPF uses ?all (neutral) - provides no protection")
if not record.qualifier:
record.errors.append("SPF record has no terminating mechanism (-all/~all)")
record.valid = len(record.errors) == 0
return record
def check_dkim(domain: str, selectors: list = None) -> list:
"""Check DKIM records for common selectors."""
if selectors is None:
selectors = [
"selector1", "selector2", # Microsoft 365
"google", "default", # Google Workspace
"s1", "s2", # Generic
"dkim", "mail", # Common
"k1", "k2", # Mailchimp
"sm1", "sm2", # SendGrid
]
results = []
for selector in selectors:
record = DKIMRecord(selector=selector)
dkim_domain = f"{selector}._domainkey.{domain}"
txt_records = query_dns_txt(dkim_domain)
dkim_records = [r for r in txt_records if "DKIM1" in r or "p=" in r]
if dkim_records:
record.raw = dkim_records[0]
if "v=DKIM1" in record.raw:
record.version = "DKIM1"
import re
key_match = re.search(r'k=(\w+)', record.raw)
if key_match:
record.key_type = key_match.group(1)
else:
record.key_type = "rsa" # default
pub_match = re.search(r'p=([A-Za-z0-9+/=]+)', record.raw)
if pub_match:
record.public_key = pub_match.group(1)
import base64
try:
key_bytes = base64.b64decode(record.public_key)
record.key_length = len(key_bytes) * 8
except Exception:
pass
if record.key_length and record.key_length < 2048:
record.errors.append(
f"DKIM key is {record.key_length}-bit (2048-bit minimum recommended per RFC 8301)"
)
if not record.public_key:
record.errors.append("DKIM record has empty public key (revoked)")
record.valid = len(record.errors) == 0
results.append(record)
return results
def check_dmarc(domain: str) -> DMARCRecord:
"""Check and validate DMARC record for a domain."""
record = DMARCRecord()
dmarc_domain = f"_dmarc.{domain}"
txt_records = query_dns_txt(dmarc_domain)
dmarc_records = [r for r in txt_records if r.startswith("v=DMARC1")]
if not dmarc_records:
record.errors.append("No DMARC record found")
return record
record.raw = dmarc_records[0]
record.version = "DMARC1"
import re
tags = {}
for tag_match in re.finditer(r'(\w+)\s*=\s*([^;]+)', record.raw):
tags[tag_match.group(1).strip()] = tag_match.group(2).strip()
record.policy = tags.get("p", "")
record.subdomain_policy = tags.get("sp", record.policy)
record.adkim = tags.get("adkim", "r")
record.aspf = tags.get("aspf", "r")
record.fo = tags.get("fo", "0")
if "pct" in tags:
try:
record.pct = int(tags["pct"])
except ValueError:
record.errors.append(f"Invalid pct value: {tags['pct']}")
if "rua" in tags:
record.rua = [uri.strip() for uri in tags["rua"].split(",")]
if "ruf" in tags:
record.ruf = [uri.strip() for uri in tags["ruf"].split(",")]
if not record.policy:
record.errors.append("DMARC record missing required p= tag")
elif record.policy not in ("none", "quarantine", "reject"):
record.errors.append(f"Invalid DMARC policy: {record.policy}")
if record.policy == "none":
record.errors.append("DMARC policy is 'none' (monitor only) - not enforcing")
if not record.rua:
record.errors.append("No aggregate report URI (rua) configured")
record.valid = len([e for e in record.errors if "monitor only" not in e and "rua" not in e]) == 0
return record
def parse_dmarc_report(xml_content: str) -> DMARCReportSummary:
"""Parse a DMARC aggregate XML report."""
summary = DMARCReportSummary()
try:
root = ET.fromstring(xml_content)
except ET.ParseError as e:
print(f"Error parsing XML: {e}", file=sys.stderr)
return summary
# Report metadata
metadata = root.find("report_metadata")
if metadata is not None:
summary.org_name = metadata.findtext("org_name", "")
summary.report_id = metadata.findtext("report_id", "")
date_range = metadata.find("date_range")
if date_range is not None:
begin = date_range.findtext("begin", "")
end = date_range.findtext("end", "")
if begin:
summary.date_begin = datetime.fromtimestamp(
int(begin), tz=timezone.utc
).strftime("%Y-%m-%d")
if end:
summary.date_end = datetime.fromtimestamp(
int(end), tz=timezone.utc
).strftime("%Y-%m-%d")
# Policy published
policy = root.find("policy_published")
if policy is not None:
summary.domain = policy.findtext("domain", "")
# Records
failing_ips = defaultdict(int)
for record_el in root.findall("record"):
rec = DMARCReportRecord()
row = record_el.find("row")
if row is not None:
rec.source_ip = row.findtext("source_ip", "")
rec.count = int(row.findtext("count", "0"))
policy_evaluated = row.find("policy_evaluated")
if policy_evaluated is not None:
rec.disposition = policy_evaluated.findtext("disposition", "")
dkim_el = policy_evaluated.findtext("dkim", "")
spf_el = policy_evaluated.findtext("spf", "")
rec.dkim_aligned = dkim_el == "pass"
rec.spf_aligned = spf_el == "pass"
identifiers = record_el.find("identifiers")
if identifiers is not None:
rec.header_from = identifiers.findtext("header_from", "")
rec.envelope_from = identifiers.findtext("envelope_from", "")
auth_results = record_el.find("auth_results")
if auth_results is not None:
dkim_el = auth_results.find("dkim")
if dkim_el is not None:
rec.dkim_domain = dkim_el.findtext("domain", "")
rec.dkim_result = dkim_el.findtext("result", "")
spf_el = auth_results.find("spf")
if spf_el is not None:
rec.spf_domain = spf_el.findtext("domain", "")
rec.spf_result = spf_el.findtext("result", "")
summary.total_messages += rec.count
if rec.dkim_aligned or rec.spf_aligned:
summary.pass_count += rec.count
else:
summary.fail_count += rec.count
failing_ips[rec.source_ip] += rec.count
summary.records.append(rec)
# Top failing IPs
summary.top_failing_ips = sorted(
failing_ips.items(), key=lambda x: x[1], reverse=True
)[:20]
return summary
def load_report_file(filepath: str) -> str:
"""Load a DMARC report file (handles .xml, .xml.gz, .zip)."""
path = Path(filepath)
if path.suffix == ".gz":
with gzip.open(path, "rt", encoding="utf-8", errors="replace") as f:
return f.read()
elif path.suffix == ".zip":
with zipfile.ZipFile(path) as zf:
for name in zf.namelist():
if name.endswith(".xml"):
with zf.open(name) as xf:
return xf.read().decode("utf-8", errors="replace")
else:
with open(path, "r", encoding="utf-8", errors="replace") as f:
return f.read()
return ""
def format_domain_check(domain: str, spf: SPFRecord, dkim_records: list,
dmarc: DMARCRecord) -> str:
"""Format domain authentication check results."""
lines = []
lines.append("=" * 70)
lines.append(f" EMAIL AUTHENTICATION CHECK: {domain}")
lines.append("=" * 70)
lines.append(f" Date: {datetime.now(timezone.utc).strftime('%Y-%m-%d %H:%M UTC')}")
lines.append("")
# SPF
status = "PASS" if spf.valid else "ISSUES"
lines.append(f"[SPF] {status}")
lines.append(f" Record: {spf.raw}")
lines.append(f" IP4 Ranges: {', '.join(spf.ip4_ranges) or 'none'}")
lines.append(f" Includes: {', '.join(spf.includes) or 'none'}")
lines.append(f" Qualifier: {spf.qualifier}")
lines.append(f" DNS Lookups: {spf.dns_lookup_count}/10")
for err in spf.errors:
lines.append(f" WARNING: {err}")
lines.append("")
# DKIM
if dkim_records:
for dkim in dkim_records:
status = "PASS" if dkim.valid else "ISSUES"
lines.append(f"[DKIM] {status} (selector: {dkim.selector})")
lines.append(f" Key Type: {dkim.key_type}")
lines.append(f" Key Length: {dkim.key_length} bits")
for err in dkim.errors:
lines.append(f" WARNING: {err}")
else:
lines.append("[DKIM] NO RECORDS FOUND")
lines.append(" Checked selectors: selector1, selector2, google, default, s1, s2, dkim, mail")
lines.append("")
# DMARC
status = "PASS" if dmarc.valid else "ISSUES"
lines.append(f"[DMARC] {status}")
lines.append(f" Record: {dmarc.raw}")
lines.append(f" Policy: {dmarc.policy}")
lines.append(f" Subdomain Policy: {dmarc.subdomain_policy}")
lines.append(f" Percentage: {dmarc.pct}%")
lines.append(f" DKIM Alignment: {dmarc.adkim} ({'relaxed' if dmarc.adkim == 'r' else 'strict'})")
lines.append(f" SPF Alignment: {dmarc.aspf} ({'relaxed' if dmarc.aspf == 'r' else 'strict'})")
lines.append(f" Aggregate Reports: {', '.join(dmarc.rua) or 'not configured'}")
lines.append(f" Forensic Reports: {', '.join(dmarc.ruf) or 'not configured'}")
for err in dmarc.errors:
lines.append(f" WARNING: {err}")
lines.append("")
# Overall assessment
lines.append("-" * 70)
all_valid = spf.valid and dmarc.valid and any(d.valid for d in dkim_records)
if all_valid and dmarc.policy == "reject":
lines.append(" OVERALL: STRONG - Full email authentication with reject policy")
elif all_valid and dmarc.policy == "quarantine":
lines.append(" OVERALL: GOOD - Full authentication, consider upgrading to reject")
elif all_valid:
lines.append(" OVERALL: MONITORING - Authentication configured but DMARC not enforcing")
else:
lines.append(" OVERALL: WEAK - Email authentication has gaps")
lines.append("=" * 70)
return "\n".join(lines)
def format_report_summary(summary: DMARCReportSummary) -> str:
"""Format DMARC report summary."""
lines = []
lines.append("=" * 70)
lines.append(" DMARC AGGREGATE REPORT SUMMARY")
lines.append("=" * 70)
lines.append(f" Reporting Org: {summary.org_name}")
lines.append(f" Report ID: {summary.report_id}")
lines.append(f" Period: {summary.date_begin} to {summary.date_end}")
lines.append(f" Domain: {summary.domain}")
lines.append("")
lines.append(f" Total Messages: {summary.total_messages}")
lines.append(f" Passed: {summary.pass_count} ({summary.pass_count*100//max(summary.total_messages,1)}%)")
lines.append(f" Failed: {summary.fail_count} ({summary.fail_count*100//max(summary.total_messages,1)}%)")
lines.append("")
if summary.top_failing_ips:
lines.append("[TOP FAILING SOURCE IPs]")
for ip, count in summary.top_failing_ips[:10]:
lines.append(f" {ip}: {count} messages")
lines.append("")
lines.append("[DETAILED RECORDS]")
for rec in summary.records[:50]:
status = "PASS" if (rec.dkim_aligned or rec.spf_aligned) else "FAIL"
lines.append(f" {rec.source_ip} ({rec.count} msgs) - {status}")
lines.append(f" Disposition: {rec.disposition} | "
f"DKIM: {rec.dkim_result} ({rec.dkim_domain}) | "
f"SPF: {rec.spf_result} ({rec.spf_domain})")
lines.append("=" * 70)
return "\n".join(lines)
def main():
parser = argparse.ArgumentParser(
description="DMARC/DKIM/SPF validator and DMARC report parser"
)
subparsers = parser.add_subparsers(dest="command")
check_parser = subparsers.add_parser("check", help="Check domain authentication records")
check_parser.add_argument("domain", help="Domain to check")
check_parser.add_argument("--selectors", nargs="+", help="DKIM selectors to check")
check_parser.add_argument("--json", action="store_true", help="Output as JSON")
report_parser = subparsers.add_parser("report", help="Parse DMARC aggregate report")
report_parser.add_argument("path", help="Path to XML report file or directory")
report_parser.add_argument("--json", action="store_true", help="Output as JSON")
# Support legacy --check-domain and --parse-report flags
parser.add_argument("--check-domain", help="Domain to check (legacy)")
parser.add_argument("--parse-report", help="Report file to parse (legacy)")
parser.add_argument("--parse-report-dir", help="Directory of reports (legacy)")
parser.add_argument("--json", action="store_true", help="Output as JSON")
args = parser.parse_args()
domain = getattr(args, "domain", None) or args.check_domain
report_path = getattr(args, "path", None) or args.parse_report or args.parse_report_dir
if domain:
spf = check_spf(domain)
dkim_records = check_dkim(domain, getattr(args, "selectors", None))
dmarc = check_dmarc(domain)
if args.json:
result = {
"domain": domain,
"spf": asdict(spf),
"dkim": [asdict(d) for d in dkim_records],
"dmarc": asdict(dmarc),
}
print(json.dumps(result, indent=2))
else:
print(format_domain_check(domain, spf, dkim_records, dmarc))
elif report_path:
path = Path(report_path)
if path.is_dir():
for f in sorted(path.glob("*")):
if f.suffix in (".xml", ".gz", ".zip"):
xml_content = load_report_file(str(f))
if xml_content:
summary = parse_dmarc_report(xml_content)
if args.json:
print(json.dumps(asdict(summary), indent=2, default=str))
else:
print(format_report_summary(summary))
print()
else:
xml_content = load_report_file(str(path))
if xml_content:
summary = parse_dmarc_report(xml_content)
if args.json:
print(json.dumps(asdict(summary), indent=2, default=str))
else:
print(format_report_summary(summary))
else:
parser.print_help()
sys.exit(1)
if __name__ == "__main__":
main()