#!/usr/bin/env python3 """Static malware analysis agent using pefile for PE binary inspection.""" import pefile import hashlib import math import os import re import sys import datetime SUSPICIOUS_IMPORTS = { "Process Injection": [ "VirtualAllocEx", "WriteProcessMemory", "CreateRemoteThread", "NtCreateThreadEx", "QueueUserAPC", "NtMapViewOfSection", ], "Keylogging": [ "GetAsyncKeyState", "SetWindowsHookExA", "SetWindowsHookExW", "GetKeyState", "GetKeyboardState", ], "Persistence": [ "RegSetValueExA", "RegSetValueExW", "CreateServiceA", "CreateServiceW", "RegCreateKeyExA", ], "Evasion": [ "IsDebuggerPresent", "CheckRemoteDebuggerPresent", "NtQueryInformationProcess", "GetTickCount", "QueryPerformanceCounter", ], "Network": [ "InternetOpenA", "InternetOpenW", "HttpSendRequestA", "URLDownloadToFileA", "URLDownloadToFileW", "WSAStartup", "InternetConnectA", "HttpOpenRequestA", ], "Crypto": [ "CryptEncrypt", "CryptDecrypt", "CryptAcquireContextA", "CryptGenKey", "CryptImportKey", ], } PACKER_SECTIONS = { ".upx0": "UPX", ".upx1": "UPX", ".aspack": "ASPack", ".adata": "ASPack", ".nsp0": "NsPack", ".vmprotect": "VMProtect", ".themida": "Themida", ".enigma1": "Enigma", ".petite": "Petite", } def compute_hashes(filepath): with open(filepath, "rb") as f: data = f.read() return { "md5": hashlib.md5(data).hexdigest(), "sha1": hashlib.sha1(data).hexdigest(), "sha256": hashlib.sha256(data).hexdigest(), "size": len(data), } def analyze_sections(pe): sections = [] for section in pe.sections: name = section.Name.decode(errors="replace").rstrip("\x00") entropy = section.get_entropy() raw_size = section.SizeOfRawData virtual_size = section.Misc_VirtualSize ratio = virtual_size / raw_size if raw_size > 0 else 0 flags = [] if entropy > 7.0: flags.append("HIGH_ENTROPY") if ratio > 10: flags.append("HIGH_VR_RATIO") sections.append({ "name": name, "entropy": round(entropy, 2), "raw_size": raw_size, "virtual_size": virtual_size, "ratio": round(ratio, 2), "flags": flags, }) return sections def detect_packer(pe): detected = [] for section in pe.sections: name = section.Name.decode(errors="replace").rstrip("\x00").lower() if name in PACKER_SECTIONS: detected.append(PACKER_SECTIONS[name]) import_count = 0 if hasattr(pe, "DIRECTORY_ENTRY_IMPORT"): import_count = sum(len(e.imports) for e in pe.DIRECTORY_ENTRY_IMPORT) if import_count < 10: detected.append(f"SUSPECTED_PACKED (only {import_count} imports)") return detected def analyze_imports(pe): findings = [] if not hasattr(pe, "DIRECTORY_ENTRY_IMPORT"): return [{"category": "PACKED", "dll": "N/A", "function": "No imports found"}] for entry in pe.DIRECTORY_ENTRY_IMPORT: dll_name = entry.dll.decode(errors="replace") for imp in entry.imports: if imp.name: func_name = imp.name.decode(errors="replace") for category, funcs in SUSPICIOUS_IMPORTS.items(): if func_name in funcs: findings.append({ "category": category, "dll": dll_name, "function": func_name, }) return findings def extract_strings(filepath, min_length=6): indicators = {"urls": [], "ips": [], "emails": [], "registry": [], "paths": []} with open(filepath, "rb") as f: data = f.read() ascii_strings = re.findall(rb"[\x20-\x7e]{%d,}" % min_length, data) for s in ascii_strings: s_decoded = s.decode("ascii", errors="ignore") if re.search(r"https?://", s_decoded): indicators["urls"].append(s_decoded) if re.search(r"\b(\d{1,3}\.){3}\d{1,3}\b", s_decoded): indicators["ips"].append(s_decoded) if re.search(r"[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}", s_decoded): indicators["emails"].append(s_decoded) if re.search(r"HKLM|HKCU|CurrentVersion\\Run", s_decoded, re.IGNORECASE): indicators["registry"].append(s_decoded) if re.search(r"\.(exe|dll|bat|ps1|vbs|tmp)", s_decoded, re.IGNORECASE): indicators["paths"].append(s_decoded) for key in indicators: indicators[key] = list(set(indicators[key]))[:20] return indicators def analyze_resources(pe): resources = [] if not hasattr(pe, "DIRECTORY_ENTRY_RESOURCE"): return resources for rtype in pe.DIRECTORY_ENTRY_RESOURCE.entries: if hasattr(rtype, "directory"): for rid in rtype.directory.entries: if hasattr(rid, "directory"): for rlang in rid.directory.entries: data = pe.get_data( rlang.data.struct.OffsetToData, rlang.data.struct.Size, ) entropy = 0.0 if len(data) > 0: freq = [0] * 256 for b in data: freq[b] += 1 for f in freq: if f > 0: p = f / len(data) entropy -= p * math.log2(p) flags = [] if entropy > 7.0: flags.append("HIGH_ENTROPY") if data[:2] == b"MZ": flags.append("EMBEDDED_PE") resources.append({ "type_id": rtype.id, "size": len(data), "entropy": round(entropy, 2), "flags": flags, }) return resources def analyze_pe(filepath): hashes = compute_hashes(filepath) pe = pefile.PE(filepath) timestamp = pe.FILE_HEADER.TimeDateStamp compile_time = datetime.datetime.utcfromtimestamp(timestamp).isoformat() + "Z" report = { "file": os.path.basename(filepath), "hashes": hashes, "compile_time": compile_time, "sections": analyze_sections(pe), "packer_indicators": detect_packer(pe), "suspicious_imports": analyze_imports(pe), "string_indicators": extract_strings(filepath), "resources": analyze_resources(pe), } pe.close() return report def print_report(report): print("STATIC MALWARE ANALYSIS REPORT") print("=" * 40) print(f"Sample: {report['file']}") print(f"MD5: {report['hashes']['md5']}") print(f"SHA-256: {report['hashes']['sha256']}") print(f"Size: {report['hashes']['size']} bytes") print(f"Compile Time: {report['compile_time']}") if report["packer_indicators"]: print(f"\nPACKER: {', '.join(report['packer_indicators'])}") print("\nSECTIONS:") for s in report["sections"]: flags = f" [{', '.join(s['flags'])}]" if s["flags"] else "" print(f" {s['name']:8s} entropy={s['entropy']} raw={s['raw_size']}{flags}") print("\nSUSPICIOUS IMPORTS:") for imp in report["suspicious_imports"]: print(f" [{imp['category']}] {imp['dll']} -> {imp['function']}") indicators = report["string_indicators"] if any(indicators.values()): print("\nEXTRACTED INDICATORS:") for key, vals in indicators.items(): if vals: print(f" {key}: {', '.join(vals[:5])}") if __name__ == "__main__": if len(sys.argv) < 2: print("Usage: python agent.py ") sys.exit(1) result = analyze_pe(sys.argv[1]) print_report(result)