#!/usr/bin/env python3 """Agent for performing indicator of compromise (IOC) lifecycle management.""" import json import argparse import csv import re from datetime import datetime from pathlib import Path IOC_PATTERNS = { "ipv4": re.compile(r"\b(?:\d{1,3}\.){3}\d{1,3}\b"), "domain": re.compile(r"\b(?:[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?\.)+[a-zA-Z]{2,}\b"), "md5": re.compile(r"\b[a-f0-9]{32}\b", re.I), "sha256": re.compile(r"\b[a-f0-9]{64}\b", re.I), "sha1": re.compile(r"\b[a-f0-9]{40}\b", re.I), "url": re.compile(r"https?://[^\s<>\"']+"), "email": re.compile(r"\b[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}\b"), "cve": re.compile(r"CVE-\d{4}-\d{4,}", re.I), } def extract_iocs(text_file): """Extract IOCs from a text file or report.""" text = Path(text_file).read_text(encoding="utf-8", errors="replace") extracted = {} for ioc_type, pattern in IOC_PATTERNS.items(): matches = list(set(pattern.findall(text))) if matches: extracted[ioc_type] = matches[:100] total = sum(len(v) for v in extracted.values()) return {"source": text_file, "total_iocs": total, "by_type": {k: len(v) for k, v in extracted.items()}, "indicators": extracted} def ingest_ioc_feed(csv_file): """Ingest IOC feed from CSV and normalize.""" with open(csv_file, "r", encoding="utf-8", errors="replace") as f: reader = csv.DictReader(f) rows = list(reader) iocs = [] for row in rows: indicator = row.get("indicator", row.get("ioc", row.get("value", row.get("Indicator", "")))) ioc_type = row.get("type", row.get("ioc_type", row.get("Type", ""))) if not ioc_type: for t, p in IOC_PATTERNS.items(): if p.fullmatch(indicator.strip()): ioc_type = t break iocs.append({ "indicator": indicator.strip(), "type": ioc_type, "source": row.get("source", row.get("feed", "")), "confidence": row.get("confidence", row.get("score", "")), "first_seen": row.get("first_seen", row.get("date", "")), "tags": row.get("tags", row.get("malware_family", "")), }) return {"total_ingested": len(iocs), "by_type": _count_field(iocs, "type"), "iocs": iocs[:50]} def check_expiration(ioc_db_file, ttl_days=90): """Check IOC database for expired indicators based on TTL.""" with open(ioc_db_file, "r", encoding="utf-8", errors="replace") as f: reader = csv.DictReader(f) rows = list(reader) now = datetime.utcnow() expired = [] active = [] for row in rows: date_str = row.get("first_seen", row.get("date", row.get("added", ""))) try: added = datetime.fromisoformat(date_str.replace("Z", "+00:00").replace("+00:00", "")) except (ValueError, AttributeError): active.append(row) continue age_days = (now - added).days if age_days > ttl_days: expired.append({**row, "age_days": age_days}) else: active.append(row) return { "total": len(rows), "active": len(active), "expired": len(expired), "ttl_days": ttl_days, "expired_indicators": expired[:30], } def deduplicate_iocs(csv_file): """Deduplicate IOCs and merge metadata from multiple sources.""" with open(csv_file, "r", encoding="utf-8", errors="replace") as f: reader = csv.DictReader(f) rows = list(reader) seen = {} for row in rows: key = row.get("indicator", row.get("ioc", row.get("value", ""))).strip().lower() if key in seen: seen[key]["sources"].add(row.get("source", "")) seen[key]["count"] += 1 else: seen[key] = {"indicator": key, "type": row.get("type", ""), "sources": {row.get("source", "")}, "count": 1, "first_row": row} unique = [{"indicator": v["indicator"], "type": v["type"], "sources": list(v["sources"]), "occurrences": v["count"]} for v in seen.values()] return { "original_count": len(rows), "unique_count": len(unique), "duplicates_removed": len(rows) - len(unique), "multi_source": [u for u in unique if u["occurrences"] > 1][:20], "unique_iocs": unique[:50], } def generate_lifecycle_report(csv_file, ttl_days=90): """Generate full IOC lifecycle status report.""" ingested = ingest_ioc_feed(csv_file) expiration = check_expiration(csv_file, ttl_days) dedup = deduplicate_iocs(csv_file) return { "generated": datetime.utcnow().isoformat(), "total_iocs": ingested["total_ingested"], "unique_iocs": dedup["unique_count"], "duplicates": dedup["duplicates_removed"], "active": expiration["active"], "expired": expiration["expired"], "by_type": ingested["by_type"], "ttl_days": ttl_days, } def _count_field(items, field): counts = {} for item in items: val = item.get(field, "unknown") counts[val] = counts.get(val, 0) + 1 return counts def main(): parser = argparse.ArgumentParser(description="IOC Lifecycle Management Agent") sub = parser.add_subparsers(dest="command") e = sub.add_parser("extract", help="Extract IOCs from text") e.add_argument("--file", required=True) i = sub.add_parser("ingest", help="Ingest IOC feed CSV") i.add_argument("--csv", required=True) x = sub.add_parser("expire", help="Check IOC expiration") x.add_argument("--csv", required=True) x.add_argument("--ttl", type=int, default=90, help="TTL in days") d = sub.add_parser("dedup", help="Deduplicate IOCs") d.add_argument("--csv", required=True) r = sub.add_parser("report", help="Full lifecycle report") r.add_argument("--csv", required=True) r.add_argument("--ttl", type=int, default=90) args = parser.parse_args() if args.command == "extract": result = extract_iocs(args.file) elif args.command == "ingest": result = ingest_ioc_feed(args.csv) elif args.command == "expire": result = check_expiration(args.csv, args.ttl) elif args.command == "dedup": result = deduplicate_iocs(args.csv) elif args.command == "report": result = generate_lifecycle_report(args.csv, args.ttl) else: parser.print_help() return print(json.dumps(result, indent=2, default=str)) if __name__ == "__main__": main()