Initial commit - 611 cybersecurity skills across all subdomains

2026-08-03 01:10:18 +03:00 · 2026-02-25 10:47:44 +01:00
commit 22a7ab1462
1765 changed files with 280648 additions and 0 deletions
@@ -0,0 +1,304 @@
+#!/usr/bin/env python3
+"""
+Phishing Report Triage Engine
+
+Processes user-reported phishing emails, extracts IOCs,
+performs automated analysis, and classifies the report.
+
+Usage:
+    python process.py triage --eml-file reported_email.eml
+    python process.py metrics --reports-file reports.json
+    python process.py extract-iocs --eml-file reported_email.eml
+"""
+
+import argparse
+import json
+import re
+import hashlib
+import sys
+from dataclasses import dataclass, field, asdict
+from collections import Counter
+from datetime import datetime
+
+
+@dataclass
+class ExtractedIOCs:
+    """IOCs extracted from reported email."""
+    sender_address: str = ""
+    sender_domain: str = ""
+    reply_to: str = ""
+    urls: list = field(default_factory=list)
+    domains: list = field(default_factory=list)
+    attachment_names: list = field(default_factory=list)
+    attachment_hashes: list = field(default_factory=list)
+    ip_addresses: list = field(default_factory=list)
+    subject: str = ""
+
+
+@dataclass
+class TriageResult:
+    """Triage classification result."""
+    report_id: str = ""
+    reporter: str = ""
+    classification: str = ""
+    confidence: float = 0.0
+    iocs: dict = field(default_factory=dict)
+    indicators: list = field(default_factory=list)
+    recommended_action: str = ""
+    auto_actionable: bool = False
+
+
+@dataclass
+class ReportingMetrics:
+    """Phishing reporting program metrics."""
+    total_reports: int = 0
+    confirmed_phishing: int = 0
+    confirmed_spam: int = 0
+    simulation_reports: int = 0
+    false_positives: int = 0
+    mean_triage_time_min: float = 0.0
+    top_reporters: list = field(default_factory=list)
+    report_rate: float = 0.0
+
+
+PHISHING_INDICATORS = [
+    (r'\burgent\b.*\b(action|response|attention)\b', "Urgency language", 15),
+    (r'\b(verify|confirm|validate)\s+your\s+(account|identity|password)\b', "Credential request", 20),
+    (r'\b(click|follow)\s+(here|this|the)\s+(link|button)\b', "Click-bait language", 10),
+    (r'\b(suspended|locked|disabled|compromised)\s+(account|access)\b', "Fear language", 15),
+    (r'\b(wire\s+transfer|payment|invoice|bank)\b', "Financial language", 10),
+    (r'\bgift\s+card\b', "Gift card request", 20),
+    (r'\bdo\s+not\s+(share|tell|discuss)\b', "Secrecy language", 15),
+]
+
+
+def extract_iocs(eml_content: str) -> ExtractedIOCs:
+    """Extract IOCs from email content."""
+    iocs = ExtractedIOCs()
+
+    # Extract From
+    from_match = re.search(r'^From:\s*(?:.*<)?([^>\s]+@[^>\s]+)', eml_content,
+                           re.MULTILINE | re.IGNORECASE)
+    if from_match:
+        iocs.sender_address = from_match.group(1).strip()
+        domain_match = re.search(r'@([\w.-]+)', iocs.sender_address)
+        if domain_match:
+            iocs.sender_domain = domain_match.group(1)
+
+    # Extract Reply-To
+    reply_match = re.search(r'^Reply-To:\s*(?:.*<)?([^>\s]+@[^>\s]+)', eml_content,
+                            re.MULTILINE | re.IGNORECASE)
+    if reply_match:
+        iocs.reply_to = reply_match.group(1).strip()
+
+    # Extract Subject
+    subj_match = re.search(r'^Subject:\s*(.+)$', eml_content, re.MULTILINE | re.IGNORECASE)
+    if subj_match:
+        iocs.subject = subj_match.group(1).strip()
+
+    # Extract URLs
+    urls = re.findall(r'https?://[^\s<>"\']+', eml_content)
+    iocs.urls = list(set(urls))
+
+    # Extract domains from URLs
+    for url in iocs.urls:
+        domain_match = re.search(r'https?://([^/:\s]+)', url)
+        if domain_match:
+            domain = domain_match.group(1).lower()
+            if domain not in iocs.domains:
+                iocs.domains.append(domain)
+
+    # Extract IP addresses from headers
+    ips = re.findall(r'\b(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})\b', eml_content)
+    iocs.ip_addresses = list(set(ips))
+
+    # Extract attachment filenames
+    attachments = re.findall(
+        r'filename[*]?=(?:"([^"]+)"|([^\s;]+))',
+        eml_content, re.IGNORECASE
+    )
+    for groups in attachments:
+        name = groups[0] or groups[1]
+        if name and name not in iocs.attachment_names:
+            iocs.attachment_names.append(name)
+
+    return iocs
+
+
+def triage_report(eml_content: str, simulation_subjects: list = None) -> TriageResult:
+    """Classify a reported email."""
+    result = TriageResult()
+    iocs = extract_iocs(eml_content)
+    result.iocs = asdict(iocs)
+
+    score = 0
+    body_lower = eml_content.lower()
+
+    # Check if it's a known simulation
+    if simulation_subjects:
+        for sim_subj in simulation_subjects:
+            if sim_subj.lower() in iocs.subject.lower():
+                result.classification = "simulation"
+                result.confidence = 0.95
+                result.recommended_action = "Credit reporter in training platform"
+                result.auto_actionable = True
+                return result
+
+    # Check phishing indicators
+    for pattern, desc, weight in PHISHING_INDICATORS:
+        if re.search(pattern, body_lower):
+            result.indicators.append(desc)
+            score += weight
+
+    # Check for authentication failures
+    auth_results = re.search(r'Authentication-Results:.*?(spf=fail|dkim=fail|dmarc=fail)',
+                             eml_content, re.IGNORECASE | re.DOTALL)
+    if auth_results:
+        result.indicators.append(f"Authentication failure: {auth_results.group(1)}")
+        score += 20
+
+    # Check Reply-To mismatch
+    if iocs.reply_to and iocs.sender_address:
+        reply_domain = re.search(r'@([\w.-]+)', iocs.reply_to)
+        sender_domain = re.search(r'@([\w.-]+)', iocs.sender_address)
+        if reply_domain and sender_domain:
+            if reply_domain.group(1) != sender_domain.group(1):
+                result.indicators.append("Reply-To domain mismatch")
+                score += 15
+
+    # Check for suspicious attachment types
+    risky_extensions = ['.exe', '.scr', '.bat', '.cmd', '.ps1', '.vbs',
+                        '.js', '.wsf', '.hta', '.iso', '.img']
+    for att in iocs.attachment_names:
+        if any(att.lower().endswith(ext) for ext in risky_extensions):
+            result.indicators.append(f"Risky attachment: {att}")
+            score += 25
+
+    # Classify
+    if score >= 50:
+        result.classification = "confirmed_phishing"
+        result.confidence = min(score / 100, 0.95)
+        result.recommended_action = "Retract from all inboxes, block sender domain"
+        result.auto_actionable = True
+    elif score >= 25:
+        result.classification = "suspicious"
+        result.confidence = score / 100
+        result.recommended_action = "Escalate to SOC analyst for manual review"
+        result.auto_actionable = False
+    elif score >= 10:
+        result.classification = "spam"
+        result.confidence = 0.6
+        result.recommended_action = "Move to junk for all recipients"
+        result.auto_actionable = True
+    else:
+        result.classification = "clean"
+        result.confidence = 0.7
+        result.recommended_action = "Return to inbox, notify reporter"
+        result.auto_actionable = True
+
+    return result
+
+
+def calculate_metrics(reports: list) -> ReportingMetrics:
+    """Calculate phishing reporting program metrics."""
+    metrics = ReportingMetrics()
+    metrics.total_reports = len(reports)
+
+    reporter_counts = Counter()
+    triage_times = []
+
+    for report in reports:
+        classification = report.get("classification", "")
+        if classification == "confirmed_phishing":
+            metrics.confirmed_phishing += 1
+        elif classification == "spam":
+            metrics.confirmed_spam += 1
+        elif classification == "simulation":
+            metrics.simulation_reports += 1
+        elif classification == "clean":
+            metrics.false_positives += 1
+
+        reporter = report.get("reporter", "")
+        if reporter:
+            reporter_counts[reporter] += 1
+
+        triage_time = report.get("triage_time_minutes", 0)
+        if triage_time > 0:
+            triage_times.append(triage_time)
+
+    if triage_times:
+        metrics.mean_triage_time_min = sum(triage_times) / len(triage_times)
+
+    metrics.top_reporters = [
+        {"reporter": r, "count": c}
+        for r, c in reporter_counts.most_common(10)
+    ]
+
+    if metrics.total_reports > 0:
+        metrics.report_rate = (
+            (metrics.confirmed_phishing + metrics.simulation_reports) /
+            metrics.total_reports * 100
+        )
+
+    return metrics
+
+
+def main():
+    parser = argparse.ArgumentParser(description="Phishing Report Triage Engine")
+    subparsers = parser.add_subparsers(dest="command")
+
+    triage_parser = subparsers.add_parser("triage", help="Triage reported email")
+    triage_parser.add_argument("--eml-file", required=True)
+    triage_parser.add_argument("--sim-subjects", nargs="*", default=[])
+
+    metrics_parser = subparsers.add_parser("metrics", help="Calculate reporting metrics")
+    metrics_parser.add_argument("--reports-file", required=True)
+
+    ioc_parser = subparsers.add_parser("extract-iocs", help="Extract IOCs from email")
+    ioc_parser.add_argument("--eml-file", required=True)
+
+    parser.add_argument("--json", action="store_true")
+    args = parser.parse_args()
+
+    if args.command == "triage":
+        with open(args.eml_file, 'r', errors='replace') as f:
+            content = f.read()
+        result = triage_report(content, args.sim_subjects)
+        if args.json:
+            print(json.dumps(asdict(result), indent=2))
+        else:
+            print(f"Classification: {result.classification}")
+            print(f"Confidence: {result.confidence:.0%}")
+            print(f"Action: {result.recommended_action}")
+            print(f"Auto-actionable: {'Yes' if result.auto_actionable else 'No'}")
+            if result.indicators:
+                print(f"Indicators:")
+                for ind in result.indicators:
+                    print(f"  - {ind}")
+
+    elif args.command == "metrics":
+        with open(args.reports_file) as f:
+            reports = json.load(f)
+        result = calculate_metrics(reports)
+        if args.json:
+            print(json.dumps(asdict(result), indent=2))
+        else:
+            print(f"Total reports: {result.total_reports}")
+            print(f"Confirmed phishing: {result.confirmed_phishing}")
+            print(f"Spam: {result.confirmed_spam}")
+            print(f"Simulations reported: {result.simulation_reports}")
+            print(f"False positives: {result.false_positives}")
+            print(f"Mean triage time: {result.mean_triage_time_min:.1f} min")
+
+    elif args.command == "extract-iocs":
+        with open(args.eml_file, 'r', errors='replace') as f:
+            content = f.read()
+        iocs = extract_iocs(content)
+        print(json.dumps(asdict(iocs), indent=2))
+
+    else:
+        parser.print_help()
+
+
+if __name__ == "__main__":
+    main()