Files
Anthropic-Cybersecurity-Skills/skills/reverse-engineering-malware-with-ghidra/scripts/agent.py
T
mukul975 c47eed6a64 Production hardening: security fixes, code quality, 724 skills complete
- Fix 25 shell=True subprocess calls with list-based commands
- Fix 49 verify=False in defensive skills (env-var override)
- Add timeout to 231 HTTP/subprocess/socket calls
- Fix 6 SQL injection patterns with whitelist validation
- Replace 8 __import__() with standard imports
- Remove 701 unused imports across 442 files
- Add authorized-testing disclaimers to all offensive skills
- Complete 11 incomplete skill directories
- Expand 10 stub SKILL.md files with full content
- Fix 2 YAML parse errors in frontmatter
- Fix 5 pre-existing syntax errors
- Convert 22 hardcoded paths/ports to environment variables
- Back up 21 redundant skill pairs to .bak
- Fix 2 global declaration errors
- 724/724 skills with full folder anatomy (SKILL.md + agent.py + api-reference.md + LICENSE)
- 0 compile errors across all 724 agent.py files
2026-03-19 13:26:49 +01:00

233 lines
8.2 KiB
Python

#!/usr/bin/env python3
"""Malware reverse engineering agent using Ghidra headless analyzer and r2pipe."""
import subprocess
import os
import sys
import json
import re
import hashlib
try:
import r2pipe
except ImportError:
r2pipe = None
def compute_hashes(filepath):
"""Compute file hashes for identification."""
with open(filepath, "rb") as f:
data = f.read()
return {
"md5": hashlib.md5(data).hexdigest(),
"sha1": hashlib.sha1(data).hexdigest(),
"sha256": hashlib.sha256(data).hexdigest(),
"size": len(data),
}
def run_ghidra_headless(ghidra_path, project_dir, project_name, binary_path,
script=None, script_args=None):
"""Run Ghidra in headless mode for automated analysis."""
os.makedirs(project_dir, exist_ok=True)
cmd = [
os.path.join(ghidra_path, "support", "analyzeHeadless"),
project_dir, project_name,
"-import", binary_path,
"-overwrite",
]
if script:
cmd.extend(["-postScript", script])
if script_args:
cmd.extend(script_args)
result = subprocess.run(
cmd, capture_output=True, text=True, timeout=600
)
return {
"command": " ".join(cmd),
"returncode": result.returncode,
"stdout": result.stdout[-2000:] if result.stdout else "",
"stderr": result.stderr[-1000:] if result.stderr else "",
}
def export_functions_ghidra(ghidra_path, project_dir, project_name, binary_path,
output_file):
"""Export function list using Ghidra headless with a script."""
script_content = """
import ghidra.program.model.listing.FunctionIterator
import json
output = []
fm = currentProgram.getFunctionManager()
funcs = fm.getFunctions(True)
for func in funcs:
entry = {
"name": func.getName(),
"address": str(func.getEntryPoint()),
"size": func.getBody().getNumAddresses(),
"calling_convention": func.getCallingConventionName(),
"is_thunk": func.isThunk(),
}
output.append(entry)
with open("{output}", "w") as f:
json.dump(output, f, indent=2)
""".replace("{output}", output_file.replace("\\", "\\\\"))
script_path = os.path.join(project_dir, "export_functions.py")
with open(script_path, "w") as f:
f.write(script_content)
return run_ghidra_headless(
ghidra_path, project_dir, project_name, binary_path,
script="export_functions.py"
)
def analyze_with_radare2(filepath):
"""Analyze binary with radare2 via r2pipe for quick triage."""
if r2pipe is None:
return {"error": "r2pipe not installed (pip install r2pipe)"}
r2 = r2pipe.open(filepath, flags=["-2"])
r2.cmd("aaa")
info = r2.cmdj("ij")
functions = r2.cmdj("aflj") or []
imports = r2.cmdj("iij") or []
strings = r2.cmdj("izj") or []
sections = r2.cmdj("iSj") or []
entry_points = r2.cmdj("iej") or []
suspicious_imports = {
"injection": ["VirtualAllocEx", "WriteProcessMemory", "CreateRemoteThread",
"NtCreateThreadEx"],
"network": ["InternetOpenA", "HttpSendRequestA", "WSAStartup",
"URLDownloadToFileA"],
"evasion": ["IsDebuggerPresent", "CheckRemoteDebuggerPresent",
"NtQueryInformationProcess"],
"crypto": ["CryptEncrypt", "CryptDecrypt", "CryptAcquireContextA"],
"persistence": ["RegSetValueExA", "CreateServiceA"],
}
import_findings = []
for imp in imports:
name = imp.get("name", "")
for category, funcs in suspicious_imports.items():
if name in funcs:
import_findings.append({
"category": category,
"function": name,
"library": imp.get("lib", ""),
})
network_strings = []
for s in strings:
val = s.get("string", "")
if re.search(r"https?://", val) or re.search(r"\b\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}\b", val):
network_strings.append(val[:200])
section_analysis = []
for sec in sections:
entropy = sec.get("entropy", 0)
flags = []
if entropy and entropy > 7.0:
flags.append("HIGH_ENTROPY")
section_analysis.append({
"name": sec.get("name", ""),
"size": sec.get("size", 0),
"vsize": sec.get("vsize", 0),
"entropy": entropy,
"flags": flags,
})
r2.quit()
return {
"info": {
"arch": info.get("bin", {}).get("arch", ""),
"bits": info.get("bin", {}).get("bits", 0),
"os": info.get("bin", {}).get("os", ""),
"type": info.get("bin", {}).get("bintype", ""),
"compiler": info.get("bin", {}).get("compiler", ""),
},
"function_count": len(functions),
"import_count": len(imports),
"string_count": len(strings),
"suspicious_imports": import_findings,
"network_indicators": network_strings[:20],
"sections": section_analysis,
"entry_points": [{"vaddr": e.get("vaddr"), "type": e.get("type")} for e in entry_points],
}
def extract_crypto_constants(filepath):
"""Search binary for known cryptographic constants."""
with open(filepath, "rb") as f:
data = f.read()
constants = {
"AES_SBOX": bytes([0x63, 0x7C, 0x77, 0x7B, 0xF2, 0x6B, 0x6F, 0xC5]),
"RC4_INIT": bytes(range(256)),
"SHA256_INIT": bytes.fromhex("6a09e667bb67ae853c6ef372a54ff53a"),
"RSA_MAGIC": b"RSA1",
}
found = []
for name, pattern in constants.items():
offset = data.find(pattern)
if offset >= 0:
found.append({"constant": name, "offset": hex(offset)})
return found
def analyze_malware(filepath, ghidra_path=None, output_dir="/tmp/ghidra_analysis"):
"""Full malware analysis pipeline."""
os.makedirs(output_dir, exist_ok=True)
report = {"file": os.path.basename(filepath)}
report["hashes"] = compute_hashes(filepath)
report["crypto_constants"] = extract_crypto_constants(filepath)
if r2pipe:
report["radare2"] = analyze_with_radare2(filepath)
if ghidra_path and os.path.exists(ghidra_path):
ghidra_result = run_ghidra_headless(
ghidra_path, output_dir, "malware_project", filepath
)
report["ghidra"] = {
"analysis_complete": ghidra_result["returncode"] == 0,
"output": ghidra_result["stdout"][-500:],
}
return report
def print_report(report):
print("Malware Reverse Engineering Report")
print("=" * 50)
print(f"File: {report['file']}")
print(f"SHA-256: {report['hashes']['sha256']}")
print(f"Size: {report['hashes']['size']} bytes")
if report.get("crypto_constants"):
print(f"\nCrypto Constants Found:")
for c in report["crypto_constants"]:
print(f" {c['constant']} at {c['offset']}")
r2 = report.get("radare2", {})
if r2 and "error" not in r2:
info = r2.get("info", {})
print(f"\nBinary Info: {info.get('arch', '?')}/{info.get('bits', '?')}bit "
f"({info.get('os', '?')}) [{info.get('type', '?')}]")
print(f"Functions: {r2.get('function_count', 0)}")
print(f"Imports: {r2.get('import_count', 0)}")
if r2.get("suspicious_imports"):
print(f"\nSuspicious Imports:")
for imp in r2["suspicious_imports"]:
print(f" [{imp['category']}] {imp['library']} -> {imp['function']}")
if r2.get("network_indicators"):
print(f"\nNetwork Indicators:")
for ni in r2["network_indicators"][:10]:
print(f" {ni}")
print(f"\nSections:")
for sec in r2.get("sections", []):
flags = f" [{', '.join(sec['flags'])}]" if sec.get("flags") else ""
print(f" {sec['name']:10s} size={sec['size']:>8} entropy={sec.get('entropy', 0):.2f}{flags}")
if report.get("ghidra", {}).get("analysis_complete"):
print(f"\nGhidra: Analysis complete")
if __name__ == "__main__":
if len(sys.argv) < 2:
print("Usage: python agent.py <binary> [ghidra_install_path]")
sys.exit(1)
binary = sys.argv[1]
ghidra = sys.argv[2] if len(sys.argv) > 2 else None
result = analyze_malware(binary, ghidra_path=ghidra)
print_report(result)