Anthropic-Cybersecurity-Skills/skills/analyzing-macro-malware-in-office-documents/scripts/agent.py

#!/usr/bin/env python3
"""Office macro malware analysis agent using oletools for VBA extraction and deobfuscation."""

import re
import os
import sys
import hashlib
import json
import zipfile

try:
    from oletools.olevba import VBA_Parser
    from oletools import oleid
    HAS_OLETOOLS = True
except ImportError:
    HAS_OLETOOLS = False


def compute_hash(filepath):
    """Compute SHA-256 hash of a file."""
    sha256 = hashlib.sha256()
    with open(filepath, "rb") as f:
        for chunk in iter(lambda: f.read(65536), b""):
            sha256.update(chunk)
    return sha256.hexdigest()


def triage_document(filepath):
    """Quick triage using oleid to identify document capabilities."""
    if not HAS_OLETOOLS:
        return {"error": "oletools not installed: pip install oletools"}
    oid = oleid.OleID(filepath)
    indicators = oid.check()
    results = {}
    for indicator in indicators:
        results[indicator.name] = {
            "value": str(indicator.value),
            "risk": indicator.risk,
            "description": indicator.description,
        }
    return results


def extract_vba_macros(filepath):
    """Extract VBA macro code from an Office document."""
    if not HAS_OLETOOLS:
        return {"error": "oletools not installed"}
    vba_parser = VBA_Parser(filepath)
    macros = []
    if vba_parser.detect_vba_macros():
        for (filename, stream_path, vba_filename, vba_code) in vba_parser.extract_macros():
            macros.append({
                "filename": filename,
                "stream_path": stream_path,
                "vba_filename": vba_filename,
                "code": vba_code,
                "code_length": len(vba_code),
            })
    vba_parser.close()
    return macros


def analyze_vba_suspicious(filepath):
    """Analyze VBA macros for suspicious keywords and patterns."""
    if not HAS_OLETOOLS:
        return {"error": "oletools not installed"}
    vba_parser = VBA_Parser(filepath)
    analysis = {"auto_exec": [], "suspicious": [], "iocs": [], "hex_strings": []}
    if vba_parser.detect_vba_macros():
        results = vba_parser.analyze_macros()
        for (kw_type, keyword, description) in results:
            entry = {"type": kw_type, "keyword": keyword, "description": description}
            if kw_type == "AutoExec":
                analysis["auto_exec"].append(entry)
            elif kw_type == "Suspicious":
                analysis["suspicious"].append(entry)
            elif kw_type == "IOC":
                analysis["iocs"].append(entry)
            elif kw_type == "Hex String":
                analysis["hex_strings"].append(entry)
    vba_parser.close()
    return analysis


def deobfuscate_chr_calls(vba_code):
    """Resolve Chr() and ChrW() calls in VBA code."""
    def resolve_chr(match):
        try:
            return chr(int(match.group(1)))
        except (ValueError, OverflowError):
            return match.group(0)
    code = re.sub(r'Chr\$?\((\d+)\)', resolve_chr, vba_code)
    code = re.sub(r'ChrW\$?\((\d+)\)', resolve_chr, code)
    return code


def deobfuscate_concatenation(vba_code):
    """Remove string concatenation: "abc" & "def" -> "abcdef"."""
    return re.sub(r'"\s*&\s*"', '', vba_code)


def deobfuscate_strreverse(vba_code):
    """Resolve StrReverse() calls."""
    def resolve_reverse(match):
        return '"' + match.group(1)[::-1] + '"'
    return re.sub(r'StrReverse\("([^"]+)"\)', resolve_reverse, vba_code)


def deobfuscate_replace(vba_code):
    """Resolve Replace() function calls."""
    def resolve_replace(match):
        original = match.group(1)
        find = match.group(2)
        replace_with = match.group(3)
        return '"' + original.replace(find, replace_with) + '"'
    return re.sub(r'Replace\("([^"]+)",\s*"([^"]+)",\s*"([^"]*)"\)',
                  resolve_replace, vba_code)


def full_deobfuscation(vba_code):
    """Apply all deobfuscation techniques to VBA code."""
    code = deobfuscate_chr_calls(vba_code)
    code = deobfuscate_concatenation(code)
    code = deobfuscate_strreverse(code)
    code = deobfuscate_replace(code)
    return code


def extract_urls_from_code(code):
    """Extract URLs from deobfuscated VBA code."""
    return list(set(re.findall(r'https?://[^\s"\'<>]+', code)))


def check_dde(filepath):
    """Check for DDE (Dynamic Data Exchange) attacks in OOXML documents."""
    findings = []
    try:
        z = zipfile.ZipFile(filepath)
        for name in z.namelist():
            if name.endswith(".xml") or name.endswith(".rels"):
                content = z.read(name).decode("utf-8", errors="ignore")
                if "DDEAUTO" in content or "DDE " in content:
                    dde_cmds = re.findall(r'DDEAUTO[^"]*"([^"]+)"', content)
                    findings.append({
                        "type": "DDE",
                        "file": name,
                        "commands": dde_cmds,
                    })
                if "attachedTemplate" in content or "Target=" in content:
                    urls = re.findall(r'Target="(https?://[^"]+)"', content)
                    for url in urls:
                        findings.append({
                            "type": "Remote Template",
                            "file": name,
                            "url": url,
                        })
    except (zipfile.BadZipFile, KeyError):
        pass
    return findings


def check_external_relationships(filepath):
    """Check OOXML relationships for external references."""
    externals = []
    try:
        z = zipfile.ZipFile(filepath)
        for name in z.namelist():
            if ".rels" in name:
                content = z.read(name).decode("utf-8", errors="ignore")
                urls = re.findall(r'Target="(https?://[^"]+)"', content)
                for url in urls:
                    externals.append({"file": name, "url": url})
    except (zipfile.BadZipFile, KeyError):
        pass
    return externals


def generate_report(filepath, triage, macros, analysis, deobfuscated_urls, dde_findings):
    """Generate a comprehensive macro malware analysis report."""
    report = {
        "file": filepath,
        "sha256": compute_hash(filepath),
        "size": os.path.getsize(filepath),
        "triage": triage,
        "macro_count": len(macros),
        "auto_exec_triggers": [e["keyword"] for e in analysis.get("auto_exec", [])],
        "suspicious_functions": [e["keyword"] for e in analysis.get("suspicious", [])],
        "iocs": [e["keyword"] for e in analysis.get("iocs", [])],
        "extracted_urls": deobfuscated_urls,
        "dde_findings": dde_findings,
    }
    return report


if __name__ == "__main__":
    print("=" * 60)
    print("Office Macro Malware Analysis Agent")
    print("oletools-based VBA extraction and deobfuscation")
    print("=" * 60)

    target = sys.argv[1] if len(sys.argv) > 1 else None

    if target and os.path.exists(target):
        print(f"\n[*] Analyzing: {target}")
        print(f"[*] SHA-256: {compute_hash(target)}")

        print("\n--- Document Triage (oleid) ---")
        triage = triage_document(target)
        for name, info in triage.items():
            risk_tag = f" [{info['risk']}]" if info.get("risk") else ""
            print(f"  {name}: {info['value']}{risk_tag}")

        print("\n--- VBA Macro Extraction ---")
        macros = extract_vba_macros(target)
        print(f"  Macro streams found: {len(macros)}")
        for m in macros:
            print(f"  - {m['vba_filename']} ({m['code_length']} chars)")

        print("\n--- Suspicious Analysis ---")
        analysis = analyze_vba_suspicious(target)
        for trigger in analysis["auto_exec"]:
            print(f"  [!] Auto-exec: {trigger['keyword']}")
        for sus in analysis["suspicious"]:
            print(f"  [!] Suspicious: {sus['keyword']} - {sus['description']}")
        for ioc in analysis["iocs"]:
            print(f"  [IOC] {ioc['keyword']}")

        print("\n--- Deobfuscation ---")
        all_urls = []
        for m in macros:
            deobfuscated = full_deobfuscation(m["code"])
            urls = extract_urls_from_code(deobfuscated)
            all_urls.extend(urls)
        for url in set(all_urls):
            print(f"  URL: {url}")

        print("\n--- DDE / Remote Template Check ---")
        dde = check_dde(target)
        for d in dde:
            print(f"  [{d['type']}] {d.get('url', d.get('commands', ''))}")

        report = generate_report(target, triage, macros, analysis, list(set(all_urls)), dde)
        print(f"\n[*] Report: {json.dumps(report, indent=2, default=str)[:500]}...")
    else:
        print(f"\n[DEMO] Usage: python agent.py <document.docm|xlsm>")
        print("[*] Provide an Office document for macro analysis.")