#!/usr/bin/env python3 """ Forensic Evidence Processor Parses and correlates forensic artifacts from KAPE/EZ tool output to generate consolidated timeline and IOC reports. """ import json import csv import sys import os from datetime import datetime from collections import defaultdict def parse_prefetch_csv(csv_path: str) -> list: """Parse PECmd output for program execution history.""" entries = [] with open(csv_path, "r", encoding="utf-8-sig") as f: reader = csv.DictReader(f) for row in reader: entries.append({ "artifact": "prefetch", "timestamp": row.get("LastRun", ""), "executable": row.get("ExecutableName", ""), "run_count": row.get("RunCount", ""), "path": row.get("SourceFilename", ""), "hash": row.get("Hash", ""), "volume": row.get("Volume0Name", ""), }) return entries def parse_shimcache_csv(csv_path: str) -> list: """Parse AppCompatCacheParser output.""" entries = [] with open(csv_path, "r", encoding="utf-8-sig") as f: reader = csv.DictReader(f) for row in reader: entries.append({ "artifact": "shimcache", "timestamp": row.get("LastModifiedTimeUTC", ""), "path": row.get("Path", ""), "executed": row.get("Executed", ""), }) return entries def parse_amcache_csv(csv_path: str) -> list: """Parse AmcacheParser output for installed programs.""" entries = [] with open(csv_path, "r", encoding="utf-8-sig") as f: reader = csv.DictReader(f) for row in reader: entries.append({ "artifact": "amcache", "timestamp": row.get("FileKeyLastWriteTimestamp", ""), "path": row.get("FullPath", row.get("Name", "")), "sha1": row.get("SHA1", ""), "publisher": row.get("Publisher", ""), "product": row.get("ProductName", ""), }) return entries def parse_mft_csv(csv_path: str) -> list: """Parse MFTECmd output for file system timeline.""" entries = [] with open(csv_path, "r", encoding="utf-8-sig") as f: reader = csv.DictReader(f) for row in reader: entries.append({ "artifact": "mft", "timestamp_created": row.get("Created0x10", ""), "timestamp_modified": row.get("LastModified0x10", ""), "path": row.get("ParentPath", "") + "\\" + row.get("FileName", ""), "size": row.get("FileSize", ""), "in_use": row.get("InUse", ""), "is_directory": row.get("IsDirectory", ""), }) return entries def build_timeline(all_entries: list) -> list: """Build consolidated timeline from all artifact sources.""" timeline = [] for entry in all_entries: ts = "" for key in ["timestamp", "timestamp_created", "timestamp_modified"]: if entry.get(key): ts = entry[key] break if ts: timeline.append({ "timestamp": ts, "artifact": entry.get("artifact", "unknown"), "description": entry.get("path", entry.get("executable", "")), "details": {k: v for k, v in entry.items() if k not in ("timestamp", "artifact")}, }) timeline.sort(key=lambda x: x["timestamp"]) return timeline def extract_iocs(all_entries: list) -> dict: """Extract potential IOCs from forensic artifacts.""" iocs = { "file_hashes": set(), "suspicious_paths": [], "executables": set(), } suspicious_dirs = [ "\\temp\\", "\\tmp\\", "\\appdata\\local\\temp\\", "\\users\\public\\", "\\programdata\\", "\\recycle", "\\windows\\debug\\", ] for entry in all_entries: for hash_key in ["hash", "sha1", "md5"]: h = entry.get(hash_key, "") if h and len(h) >= 32: iocs["file_hashes"].add(h) path = entry.get("path", "").lower() if any(d in path for d in suspicious_dirs): if path.endswith((".exe", ".dll", ".ps1", ".bat", ".vbs", ".js")): iocs["suspicious_paths"].append({ "path": entry.get("path", ""), "artifact": entry.get("artifact", ""), "timestamp": entry.get("timestamp", ""), }) exe = entry.get("executable", "") if exe: iocs["executables"].add(exe) iocs["file_hashes"] = sorted(iocs["file_hashes"]) iocs["executables"] = sorted(iocs["executables"]) return iocs def generate_report(timeline: list, iocs: dict, output_path: str) -> None: """Generate forensic analysis report.""" report = { "report_generated": datetime.utcnow().isoformat() + "Z", "timeline_entries": len(timeline), "iocs": { "file_hashes": iocs["file_hashes"][:100], "suspicious_files": iocs["suspicious_paths"][:50], "unique_executables": len(iocs["executables"]), }, "timeline_sample": timeline[:100], } with open(output_path, "w", encoding="utf-8") as f: json.dump(report, f, indent=2) if __name__ == "__main__": if len(sys.argv) < 2: print("Usage: python process.py ") print() print("Parses KAPE/EZ tool CSV output and generates timeline + IOC report.") sys.exit(1) kape_dir = sys.argv[1] if not os.path.isdir(kape_dir): print(f"Error: Directory not found: {kape_dir}") sys.exit(1) all_entries = [] for root, dirs, files in os.walk(kape_dir): for f in files: if not f.endswith(".csv"): continue path = os.path.join(root, f) fl = f.lower() try: if "prefetch" in fl or "pecmd" in fl: all_entries.extend(parse_prefetch_csv(path)) elif "shimcache" in fl or "appcompat" in fl: all_entries.extend(parse_shimcache_csv(path)) elif "amcache" in fl: all_entries.extend(parse_amcache_csv(path)) elif "mft" in fl: all_entries.extend(parse_mft_csv(path)) except Exception as e: print(f"Warning: Could not parse {path}: {e}") print(f"Parsed {len(all_entries)} artifact entries") timeline = build_timeline(all_entries) iocs = extract_iocs(all_entries) report_path = os.path.join(kape_dir, "forensic_analysis.json") generate_report(timeline, iocs, report_path) print(f"Forensic report: {report_path}") print(f"\n--- Forensic Summary ---") print(f"Timeline entries: {len(timeline)}") print(f"Unique file hashes: {len(iocs['file_hashes'])}") print(f"Suspicious file paths: {len(iocs['suspicious_paths'])}") print(f"Unique executables: {len(iocs['executables'])}")