Files
mukul975 c47eed6a64 Production hardening: security fixes, code quality, 724 skills complete
- Fix 25 shell=True subprocess calls with list-based commands
- Fix 49 verify=False in defensive skills (env-var override)
- Add timeout to 231 HTTP/subprocess/socket calls
- Fix 6 SQL injection patterns with whitelist validation
- Replace 8 __import__() with standard imports
- Remove 701 unused imports across 442 files
- Add authorized-testing disclaimers to all offensive skills
- Complete 11 incomplete skill directories
- Expand 10 stub SKILL.md files with full content
- Fix 2 YAML parse errors in frontmatter
- Fix 5 pre-existing syntax errors
- Convert 22 hardcoded paths/ports to environment variables
- Back up 21 redundant skill pairs to .bak
- Fix 2 global declaration errors
- 724/724 skills with full folder anatomy (SKILL.md + agent.py + api-reference.md + LICENSE)
- 0 compile errors across all 724 agent.py files
2026-03-19 13:26:49 +01:00

173 lines
6.7 KiB
Python

#!/usr/bin/env python3
"""Forensic timeline reconstruction agent using Plaso subprocess wrappers."""
import subprocess
import os
import sys
import csv
from datetime import datetime
from collections import defaultdict
def verify_plaso_installed():
"""Check that log2timeline.py and psort.py are available."""
tools = {}
for tool in ["log2timeline.py", "psort.py"]:
result = subprocess.run(
[tool, "--version"], capture_output=True, text=True,
timeout=120,
)
tools[tool] = result.stdout.strip() if result.returncode == 0 else None
return tools
def run_log2timeline(image_path, storage_file, parsers=None, filter_file=None):
"""Execute log2timeline.py to generate Plaso storage file."""
cmd = ["log2timeline.py", "--storage-file", storage_file]
if parsers:
cmd.extend(["--parsers", parsers])
if filter_file:
cmd.extend(["--filter-file", filter_file])
cmd.append(image_path)
result = subprocess.run(cmd, capture_output=True, text=True, timeout=7200)
return {
"command": " ".join(cmd),
"returncode": result.returncode,
"stdout": result.stdout[-500:] if result.stdout else "",
"stderr": result.stderr[-500:] if result.stderr else "",
}
def run_psort_export(storage_file, output_file, output_format="l2tcsv",
date_filter=None):
"""Export timeline from Plaso storage using psort.py."""
cmd = ["psort.py", "-o", output_format, "-w", output_file, storage_file]
if date_filter:
cmd.append(date_filter)
result = subprocess.run(cmd, capture_output=True, text=True, timeout=3600)
return {
"command": " ".join(cmd),
"returncode": result.returncode,
"output_file": output_file,
"stdout": result.stdout[-500:] if result.stdout else "",
}
def create_filter_file(filter_path, paths=None):
"""Create a Plaso filter file for targeted parsing."""
if paths is None:
paths = [
"/Windows/System32/winevt/Logs",
"/Windows/Prefetch",
"/Users/*/NTUSER.DAT",
"/Users/*/AppData/Local/Google/Chrome",
"/Users/*/AppData/Roaming/Mozilla/Firefox",
"/$MFT",
"/$UsnJrnl:$J",
"/Windows/System32/config",
]
with open(filter_path, "w") as f:
f.write("\n".join(paths) + "\n")
return filter_path
def analyze_timeline_csv(csv_path, max_rows=500000):
"""Analyze exported timeline CSV for patterns and anomalies."""
events_by_hour = defaultdict(int)
source_counts = defaultdict(int)
total = 0
with open(csv_path, "r", errors="ignore") as f:
reader = csv.DictReader(f)
for row in reader:
if total >= max_rows:
break
total += 1
source = row.get("source_short", row.get("source", "Unknown"))
source_counts[source] += 1
timestamp = row.get("datetime", row.get("date", ""))
try:
dt = datetime.strptime(timestamp[:19], "%Y-%m-%dT%H:%M:%S")
hour_key = dt.strftime("%Y-%m-%d %H:00")
events_by_hour[hour_key] += 1
except (ValueError, TypeError):
pass
avg_per_hour = total / max(len(events_by_hour), 1)
spikes = {
h: c for h, c in events_by_hour.items() if c > avg_per_hour * 3
}
return {
"total_events": total,
"source_counts": dict(sorted(source_counts.items(), key=lambda x: -x[1])),
"spike_hours": dict(sorted(spikes.items())),
"unique_hours": len(events_by_hour),
"avg_events_per_hour": round(avg_per_hour, 1),
}
def generate_incident_window(storage_file, output_dir, start_date, end_date):
"""Export events within a specific incident time window."""
output_file = os.path.join(output_dir, "incident_window.csv")
date_filter = f"date > '{start_date}' AND date < '{end_date}'"
return run_psort_export(storage_file, output_file, date_filter=date_filter)
def full_pipeline(image_path, output_dir, parsers=None, start_date=None, end_date=None):
"""Run the full timeline reconstruction pipeline."""
os.makedirs(output_dir, exist_ok=True)
storage_file = os.path.join(output_dir, "evidence.plaso")
if parsers is None:
parsers = "winevtx,prefetch,mft,usnjrnl,lnk,recycle_bin,chrome_history,firefox_history,winreg"
filter_path = os.path.join(output_dir, "filter.txt")
create_filter_file(filter_path)
results = {"steps": []}
l2t_result = run_log2timeline(image_path, storage_file, parsers=parsers, filter_file=filter_path)
results["steps"].append({"step": "log2timeline", **l2t_result})
if l2t_result["returncode"] != 0:
results["error"] = "log2timeline failed"
return results
full_csv = os.path.join(output_dir, "full_timeline.csv")
export_result = run_psort_export(storage_file, full_csv)
results["steps"].append({"step": "psort_export", **export_result})
if os.path.exists(full_csv):
results["analysis"] = analyze_timeline_csv(full_csv)
if start_date and end_date:
window_result = generate_incident_window(storage_file, output_dir, start_date, end_date)
results["steps"].append({"step": "incident_window", **window_result})
window_csv = os.path.join(output_dir, "incident_window.csv")
if os.path.exists(window_csv):
results["incident_analysis"] = analyze_timeline_csv(window_csv)
jsonl_output = os.path.join(output_dir, "timeline.jsonl")
run_psort_export(storage_file, jsonl_output, output_format="json_line")
return results
def print_report(results):
print("Timeline Reconstruction Report")
print("=" * 50)
for step in results.get("steps", []):
status = "OK" if step.get("returncode") == 0 else "FAILED"
print(f" [{status}] {step['step']}: {step.get('command', '')[:80]}")
if "analysis" in results:
a = results["analysis"]
print(f"\nTotal Events: {a['total_events']}")
print(f"Avg/Hour: {a['avg_events_per_hour']}")
print("\nSource Breakdown:")
for src, cnt in list(a["source_counts"].items())[:10]:
print(f" {src:15s}: {cnt:>8}")
if a["spike_hours"]:
print("\nActivity Spikes:")
for hour, cnt in a["spike_hours"].items():
print(f" {hour}: {cnt} events")
if __name__ == "__main__":
if len(sys.argv) < 3:
print("Usage: python agent.py <disk_image> <output_dir> [start_date] [end_date]")
sys.exit(1)
image = sys.argv[1]
out_dir = sys.argv[2]
start = sys.argv[3] if len(sys.argv) > 3 else None
end = sys.argv[4] if len(sys.argv) > 4 else None
result = full_pipeline(image, out_dir, start_date=start, end_date=end)
print_report(result)