#!/usr/bin/env python3 """Malware reverse engineering agent using Ghidra headless analyzer and r2pipe.""" import subprocess import os import sys import json import re import hashlib try: import r2pipe except ImportError: r2pipe = None def compute_hashes(filepath): """Compute file hashes for identification.""" with open(filepath, "rb") as f: data = f.read() return { "md5": hashlib.md5(data).hexdigest(), "sha1": hashlib.sha1(data).hexdigest(), "sha256": hashlib.sha256(data).hexdigest(), "size": len(data), } def run_ghidra_headless(ghidra_path, project_dir, project_name, binary_path, script=None, script_args=None): """Run Ghidra in headless mode for automated analysis.""" os.makedirs(project_dir, exist_ok=True) cmd = [ os.path.join(ghidra_path, "support", "analyzeHeadless"), project_dir, project_name, "-import", binary_path, "-overwrite", ] if script: cmd.extend(["-postScript", script]) if script_args: cmd.extend(script_args) result = subprocess.run( cmd, capture_output=True, text=True, timeout=600 ) return { "command": " ".join(cmd), "returncode": result.returncode, "stdout": result.stdout[-2000:] if result.stdout else "", "stderr": result.stderr[-1000:] if result.stderr else "", } def export_functions_ghidra(ghidra_path, project_dir, project_name, binary_path, output_file): """Export function list using Ghidra headless with a script.""" script_content = """ import ghidra.program.model.listing.FunctionIterator import json output = [] fm = currentProgram.getFunctionManager() funcs = fm.getFunctions(True) for func in funcs: entry = { "name": func.getName(), "address": str(func.getEntryPoint()), "size": func.getBody().getNumAddresses(), "calling_convention": func.getCallingConventionName(), "is_thunk": func.isThunk(), } output.append(entry) with open("{output}", "w") as f: json.dump(output, f, indent=2) """.replace("{output}", output_file.replace("\\", "\\\\")) script_path = os.path.join(project_dir, "export_functions.py") with open(script_path, "w") as f: f.write(script_content) return run_ghidra_headless( ghidra_path, project_dir, project_name, binary_path, script="export_functions.py" ) def analyze_with_radare2(filepath): """Analyze binary with radare2 via r2pipe for quick triage.""" if r2pipe is None: return {"error": "r2pipe not installed (pip install r2pipe)"} r2 = r2pipe.open(filepath, flags=["-2"]) r2.cmd("aaa") info = r2.cmdj("ij") functions = r2.cmdj("aflj") or [] imports = r2.cmdj("iij") or [] strings = r2.cmdj("izj") or [] sections = r2.cmdj("iSj") or [] entry_points = r2.cmdj("iej") or [] suspicious_imports = { "injection": ["VirtualAllocEx", "WriteProcessMemory", "CreateRemoteThread", "NtCreateThreadEx"], "network": ["InternetOpenA", "HttpSendRequestA", "WSAStartup", "URLDownloadToFileA"], "evasion": ["IsDebuggerPresent", "CheckRemoteDebuggerPresent", "NtQueryInformationProcess"], "crypto": ["CryptEncrypt", "CryptDecrypt", "CryptAcquireContextA"], "persistence": ["RegSetValueExA", "CreateServiceA"], } import_findings = [] for imp in imports: name = imp.get("name", "") for category, funcs in suspicious_imports.items(): if name in funcs: import_findings.append({ "category": category, "function": name, "library": imp.get("lib", ""), }) network_strings = [] for s in strings: val = s.get("string", "") if re.search(r"https?://", val) or re.search(r"\b\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}\b", val): network_strings.append(val[:200]) section_analysis = [] for sec in sections: entropy = sec.get("entropy", 0) flags = [] if entropy and entropy > 7.0: flags.append("HIGH_ENTROPY") section_analysis.append({ "name": sec.get("name", ""), "size": sec.get("size", 0), "vsize": sec.get("vsize", 0), "entropy": entropy, "flags": flags, }) r2.quit() return { "info": { "arch": info.get("bin", {}).get("arch", ""), "bits": info.get("bin", {}).get("bits", 0), "os": info.get("bin", {}).get("os", ""), "type": info.get("bin", {}).get("bintype", ""), "compiler": info.get("bin", {}).get("compiler", ""), }, "function_count": len(functions), "import_count": len(imports), "string_count": len(strings), "suspicious_imports": import_findings, "network_indicators": network_strings[:20], "sections": section_analysis, "entry_points": [{"vaddr": e.get("vaddr"), "type": e.get("type")} for e in entry_points], } def extract_crypto_constants(filepath): """Search binary for known cryptographic constants.""" with open(filepath, "rb") as f: data = f.read() constants = { "AES_SBOX": bytes([0x63, 0x7C, 0x77, 0x7B, 0xF2, 0x6B, 0x6F, 0xC5]), "RC4_INIT": bytes(range(256)), "SHA256_INIT": bytes.fromhex("6a09e667bb67ae853c6ef372a54ff53a"), "RSA_MAGIC": b"RSA1", } found = [] for name, pattern in constants.items(): offset = data.find(pattern) if offset >= 0: found.append({"constant": name, "offset": hex(offset)}) return found def analyze_malware(filepath, ghidra_path=None, output_dir="/tmp/ghidra_analysis"): """Full malware analysis pipeline.""" os.makedirs(output_dir, exist_ok=True) report = {"file": os.path.basename(filepath)} report["hashes"] = compute_hashes(filepath) report["crypto_constants"] = extract_crypto_constants(filepath) if r2pipe: report["radare2"] = analyze_with_radare2(filepath) if ghidra_path and os.path.exists(ghidra_path): ghidra_result = run_ghidra_headless( ghidra_path, output_dir, "malware_project", filepath ) report["ghidra"] = { "analysis_complete": ghidra_result["returncode"] == 0, "output": ghidra_result["stdout"][-500:], } return report def print_report(report): print("Malware Reverse Engineering Report") print("=" * 50) print(f"File: {report['file']}") print(f"SHA-256: {report['hashes']['sha256']}") print(f"Size: {report['hashes']['size']} bytes") if report.get("crypto_constants"): print(f"\nCrypto Constants Found:") for c in report["crypto_constants"]: print(f" {c['constant']} at {c['offset']}") r2 = report.get("radare2", {}) if r2 and "error" not in r2: info = r2.get("info", {}) print(f"\nBinary Info: {info.get('arch', '?')}/{info.get('bits', '?')}bit " f"({info.get('os', '?')}) [{info.get('type', '?')}]") print(f"Functions: {r2.get('function_count', 0)}") print(f"Imports: {r2.get('import_count', 0)}") if r2.get("suspicious_imports"): print(f"\nSuspicious Imports:") for imp in r2["suspicious_imports"]: print(f" [{imp['category']}] {imp['library']} -> {imp['function']}") if r2.get("network_indicators"): print(f"\nNetwork Indicators:") for ni in r2["network_indicators"][:10]: print(f" {ni}") print(f"\nSections:") for sec in r2.get("sections", []): flags = f" [{', '.join(sec['flags'])}]" if sec.get("flags") else "" print(f" {sec['name']:10s} size={sec['size']:>8} entropy={sec.get('entropy', 0):.2f}{flags}") if report.get("ghidra", {}).get("analysis_complete"): print(f"\nGhidra: Analysis complete") if __name__ == "__main__": if len(sys.argv) < 2: print("Usage: python agent.py [ghidra_install_path]") sys.exit(1) binary = sys.argv[1] ghidra = sys.argv[2] if len(sys.argv) > 2 else None result = analyze_malware(binary, ghidra_path=ghidra) print_report(result)