#!/usr/bin/env python3 """Office macro malware analysis agent using oletools for VBA extraction and deobfuscation.""" import re import os import sys import hashlib import json import zipfile try: from oletools.olevba import VBA_Parser from oletools import oleid HAS_OLETOOLS = True except ImportError: HAS_OLETOOLS = False def compute_hash(filepath): """Compute SHA-256 hash of a file.""" sha256 = hashlib.sha256() with open(filepath, "rb") as f: for chunk in iter(lambda: f.read(65536), b""): sha256.update(chunk) return sha256.hexdigest() def triage_document(filepath): """Quick triage using oleid to identify document capabilities.""" if not HAS_OLETOOLS: return {"error": "oletools not installed: pip install oletools"} oid = oleid.OleID(filepath) indicators = oid.check() results = {} for indicator in indicators: results[indicator.name] = { "value": str(indicator.value), "risk": indicator.risk, "description": indicator.description, } return results def extract_vba_macros(filepath): """Extract VBA macro code from an Office document.""" if not HAS_OLETOOLS: return {"error": "oletools not installed"} vba_parser = VBA_Parser(filepath) macros = [] if vba_parser.detect_vba_macros(): for (filename, stream_path, vba_filename, vba_code) in vba_parser.extract_macros(): macros.append({ "filename": filename, "stream_path": stream_path, "vba_filename": vba_filename, "code": vba_code, "code_length": len(vba_code), }) vba_parser.close() return macros def analyze_vba_suspicious(filepath): """Analyze VBA macros for suspicious keywords and patterns.""" if not HAS_OLETOOLS: return {"error": "oletools not installed"} vba_parser = VBA_Parser(filepath) analysis = {"auto_exec": [], "suspicious": [], "iocs": [], "hex_strings": []} if vba_parser.detect_vba_macros(): results = vba_parser.analyze_macros() for (kw_type, keyword, description) in results: entry = {"type": kw_type, "keyword": keyword, "description": description} if kw_type == "AutoExec": analysis["auto_exec"].append(entry) elif kw_type == "Suspicious": analysis["suspicious"].append(entry) elif kw_type == "IOC": analysis["iocs"].append(entry) elif kw_type == "Hex String": analysis["hex_strings"].append(entry) vba_parser.close() return analysis def deobfuscate_chr_calls(vba_code): """Resolve Chr() and ChrW() calls in VBA code.""" def resolve_chr(match): try: return chr(int(match.group(1))) except (ValueError, OverflowError): return match.group(0) code = re.sub(r'Chr\$?\((\d+)\)', resolve_chr, vba_code) code = re.sub(r'ChrW\$?\((\d+)\)', resolve_chr, code) return code def deobfuscate_concatenation(vba_code): """Remove string concatenation: "abc" & "def" -> "abcdef".""" return re.sub(r'"\s*&\s*"', '', vba_code) def deobfuscate_strreverse(vba_code): """Resolve StrReverse() calls.""" def resolve_reverse(match): return '"' + match.group(1)[::-1] + '"' return re.sub(r'StrReverse\("([^"]+)"\)', resolve_reverse, vba_code) def deobfuscate_replace(vba_code): """Resolve Replace() function calls.""" def resolve_replace(match): original = match.group(1) find = match.group(2) replace_with = match.group(3) return '"' + original.replace(find, replace_with) + '"' return re.sub(r'Replace\("([^"]+)",\s*"([^"]+)",\s*"([^"]*)"\)', resolve_replace, vba_code) def full_deobfuscation(vba_code): """Apply all deobfuscation techniques to VBA code.""" code = deobfuscate_chr_calls(vba_code) code = deobfuscate_concatenation(code) code = deobfuscate_strreverse(code) code = deobfuscate_replace(code) return code def extract_urls_from_code(code): """Extract URLs from deobfuscated VBA code.""" return list(set(re.findall(r'https?://[^\s"\'<>]+', code))) def check_dde(filepath): """Check for DDE (Dynamic Data Exchange) attacks in OOXML documents.""" findings = [] try: z = zipfile.ZipFile(filepath) for name in z.namelist(): if name.endswith(".xml") or name.endswith(".rels"): content = z.read(name).decode("utf-8", errors="ignore") if "DDEAUTO" in content or "DDE " in content: dde_cmds = re.findall(r'DDEAUTO[^"]*"([^"]+)"', content) findings.append({ "type": "DDE", "file": name, "commands": dde_cmds, }) if "attachedTemplate" in content or "Target=" in content: urls = re.findall(r'Target="(https?://[^"]+)"', content) for url in urls: findings.append({ "type": "Remote Template", "file": name, "url": url, }) except (zipfile.BadZipFile, KeyError): pass return findings def check_external_relationships(filepath): """Check OOXML relationships for external references.""" externals = [] try: z = zipfile.ZipFile(filepath) for name in z.namelist(): if ".rels" in name: content = z.read(name).decode("utf-8", errors="ignore") urls = re.findall(r'Target="(https?://[^"]+)"', content) for url in urls: externals.append({"file": name, "url": url}) except (zipfile.BadZipFile, KeyError): pass return externals def generate_report(filepath, triage, macros, analysis, deobfuscated_urls, dde_findings): """Generate a comprehensive macro malware analysis report.""" report = { "file": filepath, "sha256": compute_hash(filepath), "size": os.path.getsize(filepath), "triage": triage, "macro_count": len(macros), "auto_exec_triggers": [e["keyword"] for e in analysis.get("auto_exec", [])], "suspicious_functions": [e["keyword"] for e in analysis.get("suspicious", [])], "iocs": [e["keyword"] for e in analysis.get("iocs", [])], "extracted_urls": deobfuscated_urls, "dde_findings": dde_findings, } return report if __name__ == "__main__": print("=" * 60) print("Office Macro Malware Analysis Agent") print("oletools-based VBA extraction and deobfuscation") print("=" * 60) target = sys.argv[1] if len(sys.argv) > 1 else None if target and os.path.exists(target): print(f"\n[*] Analyzing: {target}") print(f"[*] SHA-256: {compute_hash(target)}") print("\n--- Document Triage (oleid) ---") triage = triage_document(target) for name, info in triage.items(): risk_tag = f" [{info['risk']}]" if info.get("risk") else "" print(f" {name}: {info['value']}{risk_tag}") print("\n--- VBA Macro Extraction ---") macros = extract_vba_macros(target) print(f" Macro streams found: {len(macros)}") for m in macros: print(f" - {m['vba_filename']} ({m['code_length']} chars)") print("\n--- Suspicious Analysis ---") analysis = analyze_vba_suspicious(target) for trigger in analysis["auto_exec"]: print(f" [!] Auto-exec: {trigger['keyword']}") for sus in analysis["suspicious"]: print(f" [!] Suspicious: {sus['keyword']} - {sus['description']}") for ioc in analysis["iocs"]: print(f" [IOC] {ioc['keyword']}") print("\n--- Deobfuscation ---") all_urls = [] for m in macros: deobfuscated = full_deobfuscation(m["code"]) urls = extract_urls_from_code(deobfuscated) all_urls.extend(urls) for url in set(all_urls): print(f" URL: {url}") print("\n--- DDE / Remote Template Check ---") dde = check_dde(target) for d in dde: print(f" [{d['type']}] {d.get('url', d.get('commands', ''))}") report = generate_report(target, triage, macros, analysis, list(set(all_urls)), dde) print(f"\n[*] Report: {json.dumps(report, indent=2, default=str)[:500]}...") else: print(f"\n[DEMO] Usage: python agent.py ") print("[*] Provide an Office document for macro analysis.")