Files
mukul975 c47eed6a64 Production hardening: security fixes, code quality, 724 skills complete
- Fix 25 shell=True subprocess calls with list-based commands
- Fix 49 verify=False in defensive skills (env-var override)
- Add timeout to 231 HTTP/subprocess/socket calls
- Fix 6 SQL injection patterns with whitelist validation
- Replace 8 __import__() with standard imports
- Remove 701 unused imports across 442 files
- Add authorized-testing disclaimers to all offensive skills
- Complete 11 incomplete skill directories
- Expand 10 stub SKILL.md files with full content
- Fix 2 YAML parse errors in frontmatter
- Fix 5 pre-existing syntax errors
- Convert 22 hardcoded paths/ports to environment variables
- Back up 21 redundant skill pairs to .bak
- Fix 2 global declaration errors
- 724/724 skills with full folder anatomy (SKILL.md + agent.py + api-reference.md + LICENSE)
- 0 compile errors across all 724 agent.py files
2026-03-19 13:26:49 +01:00

247 lines
8.6 KiB
Python

#!/usr/bin/env python3
"""Office macro malware analysis agent using oletools for VBA extraction and deobfuscation."""
import re
import os
import sys
import hashlib
import json
import zipfile
try:
from oletools.olevba import VBA_Parser
from oletools import oleid
HAS_OLETOOLS = True
except ImportError:
HAS_OLETOOLS = False
def compute_hash(filepath):
"""Compute SHA-256 hash of a file."""
sha256 = hashlib.sha256()
with open(filepath, "rb") as f:
for chunk in iter(lambda: f.read(65536), b""):
sha256.update(chunk)
return sha256.hexdigest()
def triage_document(filepath):
"""Quick triage using oleid to identify document capabilities."""
if not HAS_OLETOOLS:
return {"error": "oletools not installed: pip install oletools"}
oid = oleid.OleID(filepath)
indicators = oid.check()
results = {}
for indicator in indicators:
results[indicator.name] = {
"value": str(indicator.value),
"risk": indicator.risk,
"description": indicator.description,
}
return results
def extract_vba_macros(filepath):
"""Extract VBA macro code from an Office document."""
if not HAS_OLETOOLS:
return {"error": "oletools not installed"}
vba_parser = VBA_Parser(filepath)
macros = []
if vba_parser.detect_vba_macros():
for (filename, stream_path, vba_filename, vba_code) in vba_parser.extract_macros():
macros.append({
"filename": filename,
"stream_path": stream_path,
"vba_filename": vba_filename,
"code": vba_code,
"code_length": len(vba_code),
})
vba_parser.close()
return macros
def analyze_vba_suspicious(filepath):
"""Analyze VBA macros for suspicious keywords and patterns."""
if not HAS_OLETOOLS:
return {"error": "oletools not installed"}
vba_parser = VBA_Parser(filepath)
analysis = {"auto_exec": [], "suspicious": [], "iocs": [], "hex_strings": []}
if vba_parser.detect_vba_macros():
results = vba_parser.analyze_macros()
for (kw_type, keyword, description) in results:
entry = {"type": kw_type, "keyword": keyword, "description": description}
if kw_type == "AutoExec":
analysis["auto_exec"].append(entry)
elif kw_type == "Suspicious":
analysis["suspicious"].append(entry)
elif kw_type == "IOC":
analysis["iocs"].append(entry)
elif kw_type == "Hex String":
analysis["hex_strings"].append(entry)
vba_parser.close()
return analysis
def deobfuscate_chr_calls(vba_code):
"""Resolve Chr() and ChrW() calls in VBA code."""
def resolve_chr(match):
try:
return chr(int(match.group(1)))
except (ValueError, OverflowError):
return match.group(0)
code = re.sub(r'Chr\$?\((\d+)\)', resolve_chr, vba_code)
code = re.sub(r'ChrW\$?\((\d+)\)', resolve_chr, code)
return code
def deobfuscate_concatenation(vba_code):
"""Remove string concatenation: "abc" & "def" -> "abcdef"."""
return re.sub(r'"\s*&\s*"', '', vba_code)
def deobfuscate_strreverse(vba_code):
"""Resolve StrReverse() calls."""
def resolve_reverse(match):
return '"' + match.group(1)[::-1] + '"'
return re.sub(r'StrReverse\("([^"]+)"\)', resolve_reverse, vba_code)
def deobfuscate_replace(vba_code):
"""Resolve Replace() function calls."""
def resolve_replace(match):
original = match.group(1)
find = match.group(2)
replace_with = match.group(3)
return '"' + original.replace(find, replace_with) + '"'
return re.sub(r'Replace\("([^"]+)",\s*"([^"]+)",\s*"([^"]*)"\)',
resolve_replace, vba_code)
def full_deobfuscation(vba_code):
"""Apply all deobfuscation techniques to VBA code."""
code = deobfuscate_chr_calls(vba_code)
code = deobfuscate_concatenation(code)
code = deobfuscate_strreverse(code)
code = deobfuscate_replace(code)
return code
def extract_urls_from_code(code):
"""Extract URLs from deobfuscated VBA code."""
return list(set(re.findall(r'https?://[^\s"\'<>]+', code)))
def check_dde(filepath):
"""Check for DDE (Dynamic Data Exchange) attacks in OOXML documents."""
findings = []
try:
z = zipfile.ZipFile(filepath)
for name in z.namelist():
if name.endswith(".xml") or name.endswith(".rels"):
content = z.read(name).decode("utf-8", errors="ignore")
if "DDEAUTO" in content or "DDE " in content:
dde_cmds = re.findall(r'DDEAUTO[^"]*"([^"]+)"', content)
findings.append({
"type": "DDE",
"file": name,
"commands": dde_cmds,
})
if "attachedTemplate" in content or "Target=" in content:
urls = re.findall(r'Target="(https?://[^"]+)"', content)
for url in urls:
findings.append({
"type": "Remote Template",
"file": name,
"url": url,
})
except (zipfile.BadZipFile, KeyError):
pass
return findings
def check_external_relationships(filepath):
"""Check OOXML relationships for external references."""
externals = []
try:
z = zipfile.ZipFile(filepath)
for name in z.namelist():
if ".rels" in name:
content = z.read(name).decode("utf-8", errors="ignore")
urls = re.findall(r'Target="(https?://[^"]+)"', content)
for url in urls:
externals.append({"file": name, "url": url})
except (zipfile.BadZipFile, KeyError):
pass
return externals
def generate_report(filepath, triage, macros, analysis, deobfuscated_urls, dde_findings):
"""Generate a comprehensive macro malware analysis report."""
report = {
"file": filepath,
"sha256": compute_hash(filepath),
"size": os.path.getsize(filepath),
"triage": triage,
"macro_count": len(macros),
"auto_exec_triggers": [e["keyword"] for e in analysis.get("auto_exec", [])],
"suspicious_functions": [e["keyword"] for e in analysis.get("suspicious", [])],
"iocs": [e["keyword"] for e in analysis.get("iocs", [])],
"extracted_urls": deobfuscated_urls,
"dde_findings": dde_findings,
}
return report
if __name__ == "__main__":
print("=" * 60)
print("Office Macro Malware Analysis Agent")
print("oletools-based VBA extraction and deobfuscation")
print("=" * 60)
target = sys.argv[1] if len(sys.argv) > 1 else None
if target and os.path.exists(target):
print(f"\n[*] Analyzing: {target}")
print(f"[*] SHA-256: {compute_hash(target)}")
print("\n--- Document Triage (oleid) ---")
triage = triage_document(target)
for name, info in triage.items():
risk_tag = f" [{info['risk']}]" if info.get("risk") else ""
print(f" {name}: {info['value']}{risk_tag}")
print("\n--- VBA Macro Extraction ---")
macros = extract_vba_macros(target)
print(f" Macro streams found: {len(macros)}")
for m in macros:
print(f" - {m['vba_filename']} ({m['code_length']} chars)")
print("\n--- Suspicious Analysis ---")
analysis = analyze_vba_suspicious(target)
for trigger in analysis["auto_exec"]:
print(f" [!] Auto-exec: {trigger['keyword']}")
for sus in analysis["suspicious"]:
print(f" [!] Suspicious: {sus['keyword']} - {sus['description']}")
for ioc in analysis["iocs"]:
print(f" [IOC] {ioc['keyword']}")
print("\n--- Deobfuscation ---")
all_urls = []
for m in macros:
deobfuscated = full_deobfuscation(m["code"])
urls = extract_urls_from_code(deobfuscated)
all_urls.extend(urls)
for url in set(all_urls):
print(f" URL: {url}")
print("\n--- DDE / Remote Template Check ---")
dde = check_dde(target)
for d in dde:
print(f" [{d['type']}] {d.get('url', d.get('commands', ''))}")
report = generate_report(target, triage, macros, analysis, list(set(all_urls)), dde)
print(f"\n[*] Report: {json.dumps(report, indent=2, default=str)[:500]}...")
else:
print(f"\n[DEMO] Usage: python agent.py <document.docm|xlsm>")
print("[*] Provide an Office document for macro analysis.")