#!/usr/bin/env python3 """Agent for performing indicator of compromise (IOC) lifecycle management.""" import json import argparse import csv import re import hashlib from datetime import datetime, timedelta from pathlib import Path IOC_PATTERNS = { "ipv4": re.compile(r"\b(?:\d{1,3}\.){3}\d{1,3}\b"), "domain": re.compile(r"\b(?:[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?\.)+[a-zA-Z]{2,}\b"), "md5": re.compile(r"\b[a-f0-9]{32}\b", re.I), "sha256": re.compile(r"\b[a-f0-9]{64}\b", re.I), "sha1": re.compile(r"\b[a-f0-9]{40}\b", re.I), "url": re.compile(r"https?://[^\s<>\"']+"), "email": re.compile(r"\b[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}\b"), "cve": re.compile(r"CVE-\d{4}-\d{4,}", re.I), } def extract_iocs(text_file): """Extract IOCs from a text file or report.""" text = Path(text_file).read_text(encoding="utf-8", errors="replace") extracted = {} for ioc_type, pattern in IOC_PATTERNS.items(): matches = list(set(pattern.findall(text))) if matches: extracted[ioc_type] = matches[:100] total = sum(len(v) for v in extracted.values()) return {"source": text_file, "total_iocs": total, "by_type": {k: len(v) for k, v in extracted.items()}, "indicators": extracted} def ingest_ioc_feed(csv_file): """Ingest IOC feed from CSV and normalize.""" with open(csv_file, "r", encoding="utf-8", errors="replace") as f: reader = csv.DictReader(f) rows = list(reader) iocs = [] for row in rows: indicator = row.get("indicator", row.get("ioc", row.get("value", row.get("Indicator", "")))) ioc_type = row.get("type", row.get("ioc_type", row.get("Type", ""))) if not ioc_type: for t, p in IOC_PATTERNS.items(): if p.fullmatch(indicator.strip()): ioc_type = t break iocs.append({ "indicator": indicator.strip(), "type": ioc_type, "source": row.get("source", row.get("feed", "")), "confidence": row.get("confidence", row.get("score", "")), "first_seen": row.get("first_seen", row.get("date", "")), "tags": row.get("tags", row.get("malware_family", "")), }) return {"total_ingested": len(iocs), "by_type": _count_field(iocs, "type"), "iocs": iocs[:50]} def check_expiration(ioc_db_file, ttl_days=90): """Check IOC database for expired indicators based on TTL.""" with open(ioc_db_file, "r", encoding="utf-8", errors="replace") as f: reader = csv.DictReader(f) rows = list(reader) now = datetime.utcnow() expired = [] active = [] for row in rows: date_str = row.get("first_seen", row.get("date", row.get("added", ""))) try: added = datetime.fromisoformat(date_str.replace("Z", "+00:00").replace("+00:00", "")) except (ValueError, AttributeError): active.append(row) continue age_days = (now - added).days if age_days > ttl_days: expired.append({**row, "age_days": age_days}) else: active.append(row) return { "total": len(rows), "active": len(active), "expired": len(expired), "ttl_days": ttl_days, "expired_indicators": expired[:30], } def deduplicate_iocs(csv_file): """Deduplicate IOCs and merge metadata from multiple sources.""" with open(csv_file, "r", encoding="utf-8", errors="replace") as f: reader = csv.DictReader(f) rows = list(reader) seen = {} for row in rows: key = row.get("indicator", row.get("ioc", row.get("value", ""))).strip().lower() if key in seen: seen[key]["sources"].add(row.get("source", "")) seen[key]["count"] += 1 else: seen[key] = {"indicator": key, "type": row.get("type", ""), "sources": {row.get("source", "")}, "count": 1, "first_row": row} unique = [{"indicator": v["indicator"], "type": v["type"], "sources": list(v["sources"]), "occurrences": v["count"]} for v in seen.values()] return { "original_count": len(rows), "unique_count": len(unique), "duplicates_removed": len(rows) - len(unique), "multi_source": [u for u in unique if u["occurrences"] > 1][:20], "unique_iocs": unique[:50], } def generate_lifecycle_report(csv_file, ttl_days=90): """Generate full IOC lifecycle status report.""" ingested = ingest_ioc_feed(csv_file) expiration = check_expiration(csv_file, ttl_days) dedup = deduplicate_iocs(csv_file) return { "generated": datetime.utcnow().isoformat(), "total_iocs": ingested["total_ingested"], "unique_iocs": dedup["unique_count"], "duplicates": dedup["duplicates_removed"], "active": expiration["active"], "expired": expiration["expired"], "by_type": ingested["by_type"], "ttl_days": ttl_days, } def _count_field(items, field): counts = {} for item in items: val = item.get(field, "unknown") counts[val] = counts.get(val, 0) + 1 return counts def main(): parser = argparse.ArgumentParser(description="IOC Lifecycle Management Agent") sub = parser.add_subparsers(dest="command") e = sub.add_parser("extract", help="Extract IOCs from text") e.add_argument("--file", required=True) i = sub.add_parser("ingest", help="Ingest IOC feed CSV") i.add_argument("--csv", required=True) x = sub.add_parser("expire", help="Check IOC expiration") x.add_argument("--csv", required=True) x.add_argument("--ttl", type=int, default=90, help="TTL in days") d = sub.add_parser("dedup", help="Deduplicate IOCs") d.add_argument("--csv", required=True) r = sub.add_parser("report", help="Full lifecycle report") r.add_argument("--csv", required=True) r.add_argument("--ttl", type=int, default=90) args = parser.parse_args() if args.command == "extract": result = extract_iocs(args.file) elif args.command == "ingest": result = ingest_ioc_feed(args.csv) elif args.command == "expire": result = check_expiration(args.csv, args.ttl) elif args.command == "dedup": result = deduplicate_iocs(args.csv) elif args.command == "report": result = generate_lifecycle_report(args.csv, args.ttl) else: parser.print_help() return print(json.dumps(result, indent=2, default=str)) if __name__ == "__main__": main()