#!/usr/bin/env python3 """Agent for performing Linux log forensics investigation.""" import json import argparse import re import gzip from pathlib import Path from collections import Counter def analyze_auth_log(log_file): """Analyze /var/log/auth.log for suspicious authentication events.""" content = _read_log(log_file) failed_logins = [] successful_logins = [] sudo_events = [] ssh_events = [] for line in content.splitlines(): if "Failed password" in line or "authentication failure" in line: ip_match = re.search(r"from (\d+\.\d+\.\d+\.\d+)", line) user_match = re.search(r"for (?:invalid user )?(\S+)", line) failed_logins.append({ "user": user_match.group(1) if user_match else "", "source_ip": ip_match.group(1) if ip_match else "", "line": line.strip()[:200], }) elif "Accepted" in line: ip_match = re.search(r"from (\d+\.\d+\.\d+\.\d+)", line) user_match = re.search(r"for (\S+)", line) successful_logins.append({ "user": user_match.group(1) if user_match else "", "source_ip": ip_match.group(1) if ip_match else "", }) elif "sudo:" in line and "COMMAND=" in line: cmd_match = re.search(r"COMMAND=(.*)", line) user_match = re.search(r"sudo:\s+(\S+)", line) sudo_events.append({ "user": user_match.group(1) if user_match else "", "command": cmd_match.group(1)[:200] if cmd_match else "", }) elif "sshd" in line: ssh_events.append(line.strip()[:200]) brute_force_ips = Counter(f.get("source_ip") for f in failed_logins if f.get("source_ip")) bf_suspects = [{"ip": ip, "attempts": count} for ip, count in brute_force_ips.most_common(10) if count >= 5] return { "log_file": log_file, "failed_logins": len(failed_logins), "successful_logins": len(successful_logins), "sudo_commands": len(sudo_events), "brute_force_suspects": bf_suspects, "failed_users": dict(Counter(f.get("user") for f in failed_logins).most_common(10)), "sudo_events": sudo_events[:20], "successful_logins_detail": successful_logins[:20], } def analyze_syslog(log_file): """Analyze /var/log/syslog for anomalies.""" content = _read_log(log_file) errors = [] kernel_events = [] cron_events = [] for line in content.splitlines(): lower = line.lower() if "error" in lower or "fail" in lower or "critical" in lower: errors.append(line.strip()[:200]) if "kernel:" in lower: if any(kw in lower for kw in ["segfault", "oom", "killed", "panic", "bug", "usb"]): kernel_events.append(line.strip()[:200]) if "cron" in lower: cron_events.append(line.strip()[:200]) return { "log_file": log_file, "total_errors": len(errors), "kernel_anomalies": len(kernel_events), "cron_jobs": len(cron_events), "top_errors": errors[:20], "kernel_events": kernel_events[:15], "cron_events": cron_events[:15], } def analyze_command_history(history_file): """Analyze bash history for suspicious commands.""" content = Path(history_file).read_text(encoding="utf-8", errors="replace") suspicious_patterns = [ (r"curl.*\|.*sh", "REMOTE_CODE_EXECUTION"), (r"wget.*\|.*bash", "REMOTE_CODE_EXECUTION"), (r"chmod\s+[47]77", "WORLD_WRITABLE_PERMISSION"), (r"nc\s+-[elp]", "NETCAT_LISTENER"), (r"/dev/tcp/", "BASH_REVERSE_SHELL"), (r"base64\s+-d", "BASE64_DECODE"), (r"python.*-c.*import.*socket", "PYTHON_REVERSE_SHELL"), (r"crontab\s+-[er]", "CRONTAB_MODIFICATION"), (r"iptables\s+-F", "FIREWALL_FLUSH"), (r"history\s+-c", "HISTORY_CLEAR"), (r"rm\s+-rf\s+/", "DESTRUCTIVE_COMMAND"), (r"dd\s+if=.*of=/dev/", "DISK_OVERWRITE"), ] findings = [] for line in content.splitlines(): cmd = line.strip() for pattern, tag in suspicious_patterns: if re.search(pattern, cmd, re.I): findings.append({"command": cmd[:200], "tag": tag, "pattern": pattern}) break return { "history_file": history_file, "total_commands": len(content.splitlines()), "suspicious_commands": len(findings), "findings": findings[:30], "tags": dict(Counter(f["tag"] for f in findings)), } def timeline_analysis(log_files, start_time=None, end_time=None): """Create timeline from multiple log sources.""" events = [] ts_patterns = [ re.compile(r"(\w{3}\s+\d+\s+\d+:\d+:\d+)"), re.compile(r"(\d{4}-\d{2}-\d{2}[T ]\d{2}:\d{2}:\d{2})"), ] for log_file in log_files: content = _read_log(log_file) for line in content.splitlines(): ts = None for tp in ts_patterns: m = tp.search(line) if m: ts = m.group(1) break if ts: events.append({"timestamp": ts, "source": Path(log_file).name, "event": line.strip()[:200]}) events.sort(key=lambda x: x["timestamp"]) return { "sources": log_files, "total_events": len(events), "timeline": events[:100], } def _read_log(filepath): """Read log file, supporting gzip.""" p = Path(filepath) if p.suffix == ".gz": with gzip.open(filepath, "rt", encoding="utf-8", errors="replace") as f: return f.read() return p.read_text(encoding="utf-8", errors="replace") def main(): parser = argparse.ArgumentParser(description="Linux Log Forensics Investigation Agent") sub = parser.add_subparsers(dest="command") a = sub.add_parser("auth", help="Analyze auth.log") a.add_argument("--file", required=True) s = sub.add_parser("syslog", help="Analyze syslog") s.add_argument("--file", required=True) h = sub.add_parser("history", help="Analyze bash history") h.add_argument("--file", required=True) t = sub.add_parser("timeline", help="Create event timeline") t.add_argument("--files", nargs="+", required=True) args = parser.parse_args() if args.command == "auth": result = analyze_auth_log(args.file) elif args.command == "syslog": result = analyze_syslog(args.file) elif args.command == "history": result = analyze_command_history(args.file) elif args.command == "timeline": result = timeline_analysis(args.files) else: parser.print_help() return print(json.dumps(result, indent=2, default=str)) if __name__ == "__main__": main()