#!/usr/bin/env python3 """Forensic timeline reconstruction agent using Plaso subprocess wrappers.""" import subprocess import os import sys import csv from datetime import datetime from collections import defaultdict def verify_plaso_installed(): """Check that log2timeline.py and psort.py are available.""" tools = {} for tool in ["log2timeline.py", "psort.py"]: result = subprocess.run( [tool, "--version"], capture_output=True, text=True, timeout=120, ) tools[tool] = result.stdout.strip() if result.returncode == 0 else None return tools def run_log2timeline(image_path, storage_file, parsers=None, filter_file=None): """Execute log2timeline.py to generate Plaso storage file.""" cmd = ["log2timeline.py", "--storage-file", storage_file] if parsers: cmd.extend(["--parsers", parsers]) if filter_file: cmd.extend(["--filter-file", filter_file]) cmd.append(image_path) result = subprocess.run(cmd, capture_output=True, text=True, timeout=7200) return { "command": " ".join(cmd), "returncode": result.returncode, "stdout": result.stdout[-500:] if result.stdout else "", "stderr": result.stderr[-500:] if result.stderr else "", } def run_psort_export(storage_file, output_file, output_format="l2tcsv", date_filter=None): """Export timeline from Plaso storage using psort.py.""" cmd = ["psort.py", "-o", output_format, "-w", output_file, storage_file] if date_filter: cmd.append(date_filter) result = subprocess.run(cmd, capture_output=True, text=True, timeout=3600) return { "command": " ".join(cmd), "returncode": result.returncode, "output_file": output_file, "stdout": result.stdout[-500:] if result.stdout else "", } def create_filter_file(filter_path, paths=None): """Create a Plaso filter file for targeted parsing.""" if paths is None: paths = [ "/Windows/System32/winevt/Logs", "/Windows/Prefetch", "/Users/*/NTUSER.DAT", "/Users/*/AppData/Local/Google/Chrome", "/Users/*/AppData/Roaming/Mozilla/Firefox", "/$MFT", "/$UsnJrnl:$J", "/Windows/System32/config", ] with open(filter_path, "w") as f: f.write("\n".join(paths) + "\n") return filter_path def analyze_timeline_csv(csv_path, max_rows=500000): """Analyze exported timeline CSV for patterns and anomalies.""" events_by_hour = defaultdict(int) source_counts = defaultdict(int) total = 0 with open(csv_path, "r", errors="ignore") as f: reader = csv.DictReader(f) for row in reader: if total >= max_rows: break total += 1 source = row.get("source_short", row.get("source", "Unknown")) source_counts[source] += 1 timestamp = row.get("datetime", row.get("date", "")) try: dt = datetime.strptime(timestamp[:19], "%Y-%m-%dT%H:%M:%S") hour_key = dt.strftime("%Y-%m-%d %H:00") events_by_hour[hour_key] += 1 except (ValueError, TypeError): pass avg_per_hour = total / max(len(events_by_hour), 1) spikes = { h: c for h, c in events_by_hour.items() if c > avg_per_hour * 3 } return { "total_events": total, "source_counts": dict(sorted(source_counts.items(), key=lambda x: -x[1])), "spike_hours": dict(sorted(spikes.items())), "unique_hours": len(events_by_hour), "avg_events_per_hour": round(avg_per_hour, 1), } def generate_incident_window(storage_file, output_dir, start_date, end_date): """Export events within a specific incident time window.""" output_file = os.path.join(output_dir, "incident_window.csv") date_filter = f"date > '{start_date}' AND date < '{end_date}'" return run_psort_export(storage_file, output_file, date_filter=date_filter) def full_pipeline(image_path, output_dir, parsers=None, start_date=None, end_date=None): """Run the full timeline reconstruction pipeline.""" os.makedirs(output_dir, exist_ok=True) storage_file = os.path.join(output_dir, "evidence.plaso") if parsers is None: parsers = "winevtx,prefetch,mft,usnjrnl,lnk,recycle_bin,chrome_history,firefox_history,winreg" filter_path = os.path.join(output_dir, "filter.txt") create_filter_file(filter_path) results = {"steps": []} l2t_result = run_log2timeline(image_path, storage_file, parsers=parsers, filter_file=filter_path) results["steps"].append({"step": "log2timeline", **l2t_result}) if l2t_result["returncode"] != 0: results["error"] = "log2timeline failed" return results full_csv = os.path.join(output_dir, "full_timeline.csv") export_result = run_psort_export(storage_file, full_csv) results["steps"].append({"step": "psort_export", **export_result}) if os.path.exists(full_csv): results["analysis"] = analyze_timeline_csv(full_csv) if start_date and end_date: window_result = generate_incident_window(storage_file, output_dir, start_date, end_date) results["steps"].append({"step": "incident_window", **window_result}) window_csv = os.path.join(output_dir, "incident_window.csv") if os.path.exists(window_csv): results["incident_analysis"] = analyze_timeline_csv(window_csv) jsonl_output = os.path.join(output_dir, "timeline.jsonl") run_psort_export(storage_file, jsonl_output, output_format="json_line") return results def print_report(results): print("Timeline Reconstruction Report") print("=" * 50) for step in results.get("steps", []): status = "OK" if step.get("returncode") == 0 else "FAILED" print(f" [{status}] {step['step']}: {step.get('command', '')[:80]}") if "analysis" in results: a = results["analysis"] print(f"\nTotal Events: {a['total_events']}") print(f"Avg/Hour: {a['avg_events_per_hour']}") print("\nSource Breakdown:") for src, cnt in list(a["source_counts"].items())[:10]: print(f" {src:15s}: {cnt:>8}") if a["spike_hours"]: print("\nActivity Spikes:") for hour, cnt in a["spike_hours"].items(): print(f" {hour}: {cnt} events") if __name__ == "__main__": if len(sys.argv) < 3: print("Usage: python agent.py [start_date] [end_date]") sys.exit(1) image = sys.argv[1] out_dir = sys.argv[2] start = sys.argv[3] if len(sys.argv) > 3 else None end = sys.argv[4] if len(sys.argv) > 4 else None result = full_pipeline(image, out_dir, start_date=start, end_date=end) print_report(result)