#!/usr/bin/env python3 """ DMARC/DKIM/SPF Validator and DMARC Report Parser Validates email authentication DNS records and parses DMARC aggregate XML reports to identify unauthorized senders and authentication failures. Usage: python process.py --check-domain example.com python process.py --parse-report dmarc_report.xml python process.py --parse-report-dir /path/to/reports/ """ import argparse import json import sys import xml.etree.ElementTree as ET import gzip import zipfile import io import os from dataclasses import dataclass, field, asdict from datetime import datetime, timezone from pathlib import Path from typing import Optional from collections import defaultdict try: import dns.resolver HAS_DNSPYTHON = True except ImportError: HAS_DNSPYTHON = False try: import requests HAS_REQUESTS = True except ImportError: HAS_REQUESTS = False @dataclass class SPFRecord: """Parsed SPF record details.""" raw: str = "" version: str = "" mechanisms: list = field(default_factory=list) includes: list = field(default_factory=list) ip4_ranges: list = field(default_factory=list) ip6_ranges: list = field(default_factory=list) qualifier: str = "" dns_lookup_count: int = 0 valid: bool = False errors: list = field(default_factory=list) @dataclass class DKIMRecord: """Parsed DKIM record details.""" selector: str = "" raw: str = "" version: str = "" key_type: str = "" public_key: str = "" key_length: int = 0 valid: bool = False errors: list = field(default_factory=list) @dataclass class DMARCRecord: """Parsed DMARC record details.""" raw: str = "" version: str = "" policy: str = "" subdomain_policy: str = "" pct: int = 100 rua: list = field(default_factory=list) ruf: list = field(default_factory=list) adkim: str = "r" aspf: str = "r" fo: str = "0" valid: bool = False errors: list = field(default_factory=list) @dataclass class DMARCReportRecord: """Single record from a DMARC aggregate report.""" source_ip: str = "" count: int = 0 disposition: str = "" dkim_result: str = "" dkim_domain: str = "" spf_result: str = "" spf_domain: str = "" header_from: str = "" envelope_from: str = "" dkim_aligned: bool = False spf_aligned: bool = False @dataclass class DMARCReportSummary: """Summary of a parsed DMARC aggregate report.""" org_name: str = "" report_id: str = "" date_begin: str = "" date_end: str = "" domain: str = "" total_messages: int = 0 pass_count: int = 0 fail_count: int = 0 records: list = field(default_factory=list) top_failing_ips: list = field(default_factory=list) unauthorized_senders: list = field(default_factory=list) def query_dns_txt(domain: str) -> list: """Query DNS TXT records for a domain.""" if HAS_DNSPYTHON: try: answers = dns.resolver.resolve(domain, "TXT") results = [] for rdata in answers: txt = b"".join(rdata.strings).decode("utf-8", errors="replace") results.append(txt) return results except (dns.resolver.NXDOMAIN, dns.resolver.NoAnswer, dns.resolver.NoNameservers, dns.resolver.Timeout): return [] elif HAS_REQUESTS: try: resp = requests.get( f"https://dns.google/resolve?name={domain}&type=TXT", timeout=10 ) if resp.status_code == 200: data = resp.json() results = [] for answer in data.get("Answer", []): txt = answer.get("data", "").strip('"') results.append(txt) return results except Exception: pass return [] def check_spf(domain: str) -> SPFRecord: """Check and validate SPF record for a domain.""" record = SPFRecord() txt_records = query_dns_txt(domain) spf_records = [r for r in txt_records if r.startswith("v=spf1")] if not spf_records: record.errors.append("No SPF record found") return record if len(spf_records) > 1: record.errors.append(f"Multiple SPF records found ({len(spf_records)}) - RFC violation") record.raw = spf_records[0] record.version = "spf1" parts = record.raw.split() lookup_count = 0 for part in parts[1:]: if part.startswith("include:"): domain_ref = part.split(":", 1)[1] record.includes.append(domain_ref) record.mechanisms.append(part) lookup_count += 1 elif part.startswith("ip4:"): record.ip4_ranges.append(part.split(":", 1)[1]) record.mechanisms.append(part) elif part.startswith("ip6:"): record.ip6_ranges.append(part.split(":", 1)[1]) record.mechanisms.append(part) elif part.startswith(("a:", "a")): record.mechanisms.append(part) lookup_count += 1 elif part.startswith(("mx:", "mx")): record.mechanisms.append(part) lookup_count += 1 elif part.startswith("redirect="): record.mechanisms.append(part) lookup_count += 1 elif part.startswith("exists:"): record.mechanisms.append(part) lookup_count += 1 elif part in ("-all", "~all", "?all", "+all"): record.qualifier = part record.dns_lookup_count = lookup_count if lookup_count > 10: record.errors.append(f"SPF exceeds 10 DNS lookup limit ({lookup_count} lookups)") if record.qualifier == "+all": record.errors.append("SPF uses +all which allows any sender (insecure)") elif record.qualifier == "?all": record.errors.append("SPF uses ?all (neutral) - provides no protection") if not record.qualifier: record.errors.append("SPF record has no terminating mechanism (-all/~all)") record.valid = len(record.errors) == 0 return record def check_dkim(domain: str, selectors: list = None) -> list: """Check DKIM records for common selectors.""" if selectors is None: selectors = [ "selector1", "selector2", # Microsoft 365 "google", "default", # Google Workspace "s1", "s2", # Generic "dkim", "mail", # Common "k1", "k2", # Mailchimp "sm1", "sm2", # SendGrid ] results = [] for selector in selectors: record = DKIMRecord(selector=selector) dkim_domain = f"{selector}._domainkey.{domain}" txt_records = query_dns_txt(dkim_domain) dkim_records = [r for r in txt_records if "DKIM1" in r or "p=" in r] if dkim_records: record.raw = dkim_records[0] if "v=DKIM1" in record.raw: record.version = "DKIM1" import re key_match = re.search(r'k=(\w+)', record.raw) if key_match: record.key_type = key_match.group(1) else: record.key_type = "rsa" # default pub_match = re.search(r'p=([A-Za-z0-9+/=]+)', record.raw) if pub_match: record.public_key = pub_match.group(1) import base64 try: key_bytes = base64.b64decode(record.public_key) record.key_length = len(key_bytes) * 8 except Exception: pass if record.key_length and record.key_length < 2048: record.errors.append( f"DKIM key is {record.key_length}-bit (2048-bit minimum recommended per RFC 8301)" ) if not record.public_key: record.errors.append("DKIM record has empty public key (revoked)") record.valid = len(record.errors) == 0 results.append(record) return results def check_dmarc(domain: str) -> DMARCRecord: """Check and validate DMARC record for a domain.""" record = DMARCRecord() dmarc_domain = f"_dmarc.{domain}" txt_records = query_dns_txt(dmarc_domain) dmarc_records = [r for r in txt_records if r.startswith("v=DMARC1")] if not dmarc_records: record.errors.append("No DMARC record found") return record record.raw = dmarc_records[0] record.version = "DMARC1" import re tags = {} for tag_match in re.finditer(r'(\w+)\s*=\s*([^;]+)', record.raw): tags[tag_match.group(1).strip()] = tag_match.group(2).strip() record.policy = tags.get("p", "") record.subdomain_policy = tags.get("sp", record.policy) record.adkim = tags.get("adkim", "r") record.aspf = tags.get("aspf", "r") record.fo = tags.get("fo", "0") if "pct" in tags: try: record.pct = int(tags["pct"]) except ValueError: record.errors.append(f"Invalid pct value: {tags['pct']}") if "rua" in tags: record.rua = [uri.strip() for uri in tags["rua"].split(",")] if "ruf" in tags: record.ruf = [uri.strip() for uri in tags["ruf"].split(",")] if not record.policy: record.errors.append("DMARC record missing required p= tag") elif record.policy not in ("none", "quarantine", "reject"): record.errors.append(f"Invalid DMARC policy: {record.policy}") if record.policy == "none": record.errors.append("DMARC policy is 'none' (monitor only) - not enforcing") if not record.rua: record.errors.append("No aggregate report URI (rua) configured") record.valid = len([e for e in record.errors if "monitor only" not in e and "rua" not in e]) == 0 return record def parse_dmarc_report(xml_content: str) -> DMARCReportSummary: """Parse a DMARC aggregate XML report.""" summary = DMARCReportSummary() try: root = ET.fromstring(xml_content) except ET.ParseError as e: print(f"Error parsing XML: {e}", file=sys.stderr) return summary # Report metadata metadata = root.find("report_metadata") if metadata is not None: summary.org_name = metadata.findtext("org_name", "") summary.report_id = metadata.findtext("report_id", "") date_range = metadata.find("date_range") if date_range is not None: begin = date_range.findtext("begin", "") end = date_range.findtext("end", "") if begin: summary.date_begin = datetime.fromtimestamp( int(begin), tz=timezone.utc ).strftime("%Y-%m-%d") if end: summary.date_end = datetime.fromtimestamp( int(end), tz=timezone.utc ).strftime("%Y-%m-%d") # Policy published policy = root.find("policy_published") if policy is not None: summary.domain = policy.findtext("domain", "") # Records failing_ips = defaultdict(int) for record_el in root.findall("record"): rec = DMARCReportRecord() row = record_el.find("row") if row is not None: rec.source_ip = row.findtext("source_ip", "") rec.count = int(row.findtext("count", "0")) policy_evaluated = row.find("policy_evaluated") if policy_evaluated is not None: rec.disposition = policy_evaluated.findtext("disposition", "") dkim_el = policy_evaluated.findtext("dkim", "") spf_el = policy_evaluated.findtext("spf", "") rec.dkim_aligned = dkim_el == "pass" rec.spf_aligned = spf_el == "pass" identifiers = record_el.find("identifiers") if identifiers is not None: rec.header_from = identifiers.findtext("header_from", "") rec.envelope_from = identifiers.findtext("envelope_from", "") auth_results = record_el.find("auth_results") if auth_results is not None: dkim_el = auth_results.find("dkim") if dkim_el is not None: rec.dkim_domain = dkim_el.findtext("domain", "") rec.dkim_result = dkim_el.findtext("result", "") spf_el = auth_results.find("spf") if spf_el is not None: rec.spf_domain = spf_el.findtext("domain", "") rec.spf_result = spf_el.findtext("result", "") summary.total_messages += rec.count if rec.dkim_aligned or rec.spf_aligned: summary.pass_count += rec.count else: summary.fail_count += rec.count failing_ips[rec.source_ip] += rec.count summary.records.append(rec) # Top failing IPs summary.top_failing_ips = sorted( failing_ips.items(), key=lambda x: x[1], reverse=True )[:20] return summary def load_report_file(filepath: str) -> str: """Load a DMARC report file (handles .xml, .xml.gz, .zip).""" path = Path(filepath) if path.suffix == ".gz": with gzip.open(path, "rt", encoding="utf-8", errors="replace") as f: return f.read() elif path.suffix == ".zip": with zipfile.ZipFile(path) as zf: for name in zf.namelist(): if name.endswith(".xml"): with zf.open(name) as xf: return xf.read().decode("utf-8", errors="replace") else: with open(path, "r", encoding="utf-8", errors="replace") as f: return f.read() return "" def format_domain_check(domain: str, spf: SPFRecord, dkim_records: list, dmarc: DMARCRecord) -> str: """Format domain authentication check results.""" lines = [] lines.append("=" * 70) lines.append(f" EMAIL AUTHENTICATION CHECK: {domain}") lines.append("=" * 70) lines.append(f" Date: {datetime.now(timezone.utc).strftime('%Y-%m-%d %H:%M UTC')}") lines.append("") # SPF status = "PASS" if spf.valid else "ISSUES" lines.append(f"[SPF] {status}") lines.append(f" Record: {spf.raw}") lines.append(f" IP4 Ranges: {', '.join(spf.ip4_ranges) or 'none'}") lines.append(f" Includes: {', '.join(spf.includes) or 'none'}") lines.append(f" Qualifier: {spf.qualifier}") lines.append(f" DNS Lookups: {spf.dns_lookup_count}/10") for err in spf.errors: lines.append(f" WARNING: {err}") lines.append("") # DKIM if dkim_records: for dkim in dkim_records: status = "PASS" if dkim.valid else "ISSUES" lines.append(f"[DKIM] {status} (selector: {dkim.selector})") lines.append(f" Key Type: {dkim.key_type}") lines.append(f" Key Length: {dkim.key_length} bits") for err in dkim.errors: lines.append(f" WARNING: {err}") else: lines.append("[DKIM] NO RECORDS FOUND") lines.append(" Checked selectors: selector1, selector2, google, default, s1, s2, dkim, mail") lines.append("") # DMARC status = "PASS" if dmarc.valid else "ISSUES" lines.append(f"[DMARC] {status}") lines.append(f" Record: {dmarc.raw}") lines.append(f" Policy: {dmarc.policy}") lines.append(f" Subdomain Policy: {dmarc.subdomain_policy}") lines.append(f" Percentage: {dmarc.pct}%") lines.append(f" DKIM Alignment: {dmarc.adkim} ({'relaxed' if dmarc.adkim == 'r' else 'strict'})") lines.append(f" SPF Alignment: {dmarc.aspf} ({'relaxed' if dmarc.aspf == 'r' else 'strict'})") lines.append(f" Aggregate Reports: {', '.join(dmarc.rua) or 'not configured'}") lines.append(f" Forensic Reports: {', '.join(dmarc.ruf) or 'not configured'}") for err in dmarc.errors: lines.append(f" WARNING: {err}") lines.append("") # Overall assessment lines.append("-" * 70) all_valid = spf.valid and dmarc.valid and any(d.valid for d in dkim_records) if all_valid and dmarc.policy == "reject": lines.append(" OVERALL: STRONG - Full email authentication with reject policy") elif all_valid and dmarc.policy == "quarantine": lines.append(" OVERALL: GOOD - Full authentication, consider upgrading to reject") elif all_valid: lines.append(" OVERALL: MONITORING - Authentication configured but DMARC not enforcing") else: lines.append(" OVERALL: WEAK - Email authentication has gaps") lines.append("=" * 70) return "\n".join(lines) def format_report_summary(summary: DMARCReportSummary) -> str: """Format DMARC report summary.""" lines = [] lines.append("=" * 70) lines.append(" DMARC AGGREGATE REPORT SUMMARY") lines.append("=" * 70) lines.append(f" Reporting Org: {summary.org_name}") lines.append(f" Report ID: {summary.report_id}") lines.append(f" Period: {summary.date_begin} to {summary.date_end}") lines.append(f" Domain: {summary.domain}") lines.append("") lines.append(f" Total Messages: {summary.total_messages}") lines.append(f" Passed: {summary.pass_count} ({summary.pass_count*100//max(summary.total_messages,1)}%)") lines.append(f" Failed: {summary.fail_count} ({summary.fail_count*100//max(summary.total_messages,1)}%)") lines.append("") if summary.top_failing_ips: lines.append("[TOP FAILING SOURCE IPs]") for ip, count in summary.top_failing_ips[:10]: lines.append(f" {ip}: {count} messages") lines.append("") lines.append("[DETAILED RECORDS]") for rec in summary.records[:50]: status = "PASS" if (rec.dkim_aligned or rec.spf_aligned) else "FAIL" lines.append(f" {rec.source_ip} ({rec.count} msgs) - {status}") lines.append(f" Disposition: {rec.disposition} | " f"DKIM: {rec.dkim_result} ({rec.dkim_domain}) | " f"SPF: {rec.spf_result} ({rec.spf_domain})") lines.append("=" * 70) return "\n".join(lines) def main(): parser = argparse.ArgumentParser( description="DMARC/DKIM/SPF validator and DMARC report parser" ) subparsers = parser.add_subparsers(dest="command") check_parser = subparsers.add_parser("check", help="Check domain authentication records") check_parser.add_argument("domain", help="Domain to check") check_parser.add_argument("--selectors", nargs="+", help="DKIM selectors to check") check_parser.add_argument("--json", action="store_true", help="Output as JSON") report_parser = subparsers.add_parser("report", help="Parse DMARC aggregate report") report_parser.add_argument("path", help="Path to XML report file or directory") report_parser.add_argument("--json", action="store_true", help="Output as JSON") # Support legacy --check-domain and --parse-report flags parser.add_argument("--check-domain", help="Domain to check (legacy)") parser.add_argument("--parse-report", help="Report file to parse (legacy)") parser.add_argument("--parse-report-dir", help="Directory of reports (legacy)") parser.add_argument("--json", action="store_true", help="Output as JSON") args = parser.parse_args() domain = getattr(args, "domain", None) or args.check_domain report_path = getattr(args, "path", None) or args.parse_report or args.parse_report_dir if domain: spf = check_spf(domain) dkim_records = check_dkim(domain, getattr(args, "selectors", None)) dmarc = check_dmarc(domain) if args.json: result = { "domain": domain, "spf": asdict(spf), "dkim": [asdict(d) for d in dkim_records], "dmarc": asdict(dmarc), } print(json.dumps(result, indent=2)) else: print(format_domain_check(domain, spf, dkim_records, dmarc)) elif report_path: path = Path(report_path) if path.is_dir(): for f in sorted(path.glob("*")): if f.suffix in (".xml", ".gz", ".zip"): xml_content = load_report_file(str(f)) if xml_content: summary = parse_dmarc_report(xml_content) if args.json: print(json.dumps(asdict(summary), indent=2, default=str)) else: print(format_report_summary(summary)) print() else: xml_content = load_report_file(str(path)) if xml_content: summary = parse_dmarc_report(xml_content) if args.json: print(json.dumps(asdict(summary), indent=2, default=str)) else: print(format_report_summary(summary)) else: parser.print_help() sys.exit(1) if __name__ == "__main__": main()