Files

266 lines
8.9 KiB
Python

#!/usr/bin/env python3
"""Threat intelligence lifecycle management agent.
Manages the threat intelligence lifecycle: collection from feeds,
processing/normalization of IOCs, analysis/enrichment via VirusTotal
and AbuseIPDB, dissemination to SIEM/firewalls, and tracking of
IOC aging and confidence scoring.
"""
import argparse
import csv
import hashlib
import json
import os
import re
import sys
from datetime import datetime, timezone, timedelta
try:
import requests
except ImportError:
requests = None
IOC_PATTERNS = {
"ipv4": re.compile(r'\b(?:\d{1,3}\.){3}\d{1,3}\b'),
"domain": re.compile(r'\b(?:[a-z0-9](?:[a-z0-9-]{0,61}[a-z0-9])?\.)+[a-z]{2,}\b', re.I),
"md5": re.compile(r'\b[a-fA-F0-9]{32}\b'),
"sha1": re.compile(r'\b[a-fA-F0-9]{40}\b'),
"sha256": re.compile(r'\b[a-fA-F0-9]{64}\b'),
"url": re.compile(r'https?://[^\s<>"{}|\\^`\[\]]+'),
"email": re.compile(r'[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}'),
}
def extract_iocs(text):
"""Extract IOCs from unstructured text."""
iocs = {}
for ioc_type, pattern in IOC_PATTERNS.items():
matches = set(pattern.findall(text))
# Filter out private IPs for ipv4
if ioc_type == "ipv4":
matches = {ip for ip in matches
if not ip.startswith(("10.", "192.168.", "127.", "0."))
and not ip.startswith("172.") or not (16 <= int(ip.split(".")[1]) <= 31)}
if matches:
iocs[ioc_type] = sorted(matches)
return iocs
def load_ioc_feed(source):
"""Load IOCs from a file (JSON, CSV, or plain text)."""
ext = os.path.splitext(source)[1].lower()
iocs = []
if ext == ".json":
with open(source, "r") as f:
data = json.load(f)
if isinstance(data, list):
iocs = data
elif isinstance(data, dict):
iocs = data.get("indicators", data.get("iocs", data.get("data", [])))
elif ext == ".csv":
with open(source, "r", newline="") as f:
reader = csv.DictReader(f)
iocs = list(reader)
else:
with open(source, "r") as f:
text = f.read()
extracted = extract_iocs(text)
for ioc_type, values in extracted.items():
for v in values:
iocs.append({"type": ioc_type, "value": v, "source": source})
return iocs
def normalize_ioc(ioc):
"""Normalize IOC into standard format."""
if isinstance(ioc, str):
for ioc_type, pattern in IOC_PATTERNS.items():
if pattern.fullmatch(ioc):
return {"type": ioc_type, "value": ioc.lower().strip()}
return {"type": "unknown", "value": ioc.strip()}
return {
"type": (ioc.get("type") or ioc.get("indicator_type") or "unknown").lower(),
"value": (ioc.get("value") or ioc.get("indicator") or "").lower().strip(),
"source": ioc.get("source", ""),
"confidence": ioc.get("confidence", 50),
"first_seen": ioc.get("first_seen", ""),
"last_seen": ioc.get("last_seen", ""),
"tags": ioc.get("tags", []),
"description": ioc.get("description", ""),
}
def enrich_ioc_virustotal(ioc_value, ioc_type, api_key):
"""Enrich IOC via VirusTotal API v3."""
if not requests or not api_key:
return {}
headers = {"x-apikey": api_key}
base = "https://www.virustotal.com/api/v3"
if ioc_type in ("md5", "sha1", "sha256"):
url = f"{base}/files/{ioc_value}"
elif ioc_type == "domain":
url = f"{base}/domains/{ioc_value}"
elif ioc_type == "ipv4":
url = f"{base}/ip_addresses/{ioc_value}"
elif ioc_type == "url":
url_id = hashlib.sha256(ioc_value.encode()).hexdigest()
url = f"{base}/urls/{url_id}"
else:
return {}
try:
resp = requests.get(url, headers=headers, timeout=15)
if resp.status_code == 200:
data = resp.json().get("data", {}).get("attributes", {})
stats = data.get("last_analysis_stats", {})
return {
"malicious": stats.get("malicious", 0),
"suspicious": stats.get("suspicious", 0),
"harmless": stats.get("harmless", 0),
"undetected": stats.get("undetected", 0),
"reputation": data.get("reputation", 0),
"source": "virustotal",
}
except requests.RequestException:
pass
return {}
def calculate_confidence(ioc, enrichment=None):
"""Calculate confidence score for an IOC (0-100)."""
score = ioc.get("confidence", 50)
# Boost for VT detections
if enrichment:
malicious = enrichment.get("malicious", 0)
if malicious > 10:
score = min(score + 30, 100)
elif malicious > 5:
score = min(score + 20, 100)
elif malicious > 0:
score = min(score + 10, 100)
elif enrichment.get("harmless", 0) > 20:
score = max(score - 20, 0)
# Decay based on age
first_seen = ioc.get("first_seen", "")
if first_seen:
try:
if "T" in first_seen:
seen_dt = datetime.fromisoformat(first_seen.replace("Z", "+00:00"))
else:
seen_dt = datetime.strptime(first_seen[:10], "%Y-%m-%d").replace(tzinfo=timezone.utc)
age_days = (datetime.now(timezone.utc) - seen_dt).days
if age_days > 180:
score = max(score - 20, 0)
elif age_days > 90:
score = max(score - 10, 0)
except (ValueError, TypeError):
pass
return min(max(score, 0), 100)
def format_summary(iocs, enriched_count):
"""Print lifecycle report."""
print(f"\n{'='*60}")
print(f" Threat Intelligence Lifecycle Report")
print(f"{'='*60}")
print(f" Total IOCs : {len(iocs)}")
print(f" Enriched : {enriched_count}")
by_type = {}
for ioc in iocs:
t = ioc.get("type", "unknown")
by_type[t] = by_type.get(t, 0) + 1
print(f"\n By Type:")
for t, count in sorted(by_type.items(), key=lambda x: -x[1]):
print(f" {t:12s}: {count}")
high_conf = [i for i in iocs if i.get("confidence", 0) >= 80]
med_conf = [i for i in iocs if 50 <= i.get("confidence", 0) < 80]
low_conf = [i for i in iocs if i.get("confidence", 0) < 50]
print(f"\n By Confidence:")
print(f" High (>=80) : {len(high_conf)}")
print(f" Medium : {len(med_conf)}")
print(f" Low (<50) : {len(low_conf)}")
if high_conf:
print(f"\n High-Confidence IOCs:")
for i in high_conf[:15]:
print(f" [{i['type']:8s}] {i['value'][:50]:50s} (confidence: {i.get('confidence', 0)})")
def main():
parser = argparse.ArgumentParser(description="Threat intelligence lifecycle management agent")
parser.add_argument("--source", required=True, help="IOC source file (JSON/CSV/text)")
parser.add_argument("--vt-key", help="VirusTotal API key (or VT_API_KEY env)")
parser.add_argument("--enrich", action="store_true", help="Enrich IOCs via VirusTotal")
parser.add_argument("--min-confidence", type=int, default=0, help="Min confidence to include")
parser.add_argument("--output", "-o", help="Output JSON report")
parser.add_argument("--verbose", "-v", action="store_true")
args = parser.parse_args()
vt_key = args.vt_key or os.environ.get("VT_API_KEY", "")
raw_iocs = load_ioc_feed(args.source)
print(f"[*] Loaded {len(raw_iocs)} raw IOCs from {args.source}")
iocs = [normalize_ioc(ioc) for ioc in raw_iocs]
iocs = [i for i in iocs if i.get("value")]
# Deduplicate
seen = set()
unique_iocs = []
for ioc in iocs:
key = f"{ioc['type']}:{ioc['value']}"
if key not in seen:
seen.add(key)
unique_iocs.append(ioc)
iocs = unique_iocs
print(f"[*] {len(iocs)} unique IOCs after dedup")
enriched_count = 0
if args.enrich and vt_key:
print(f"[*] Enriching IOCs via VirusTotal...")
for ioc in iocs[:100]: # Rate limit
enrichment = enrich_ioc_virustotal(ioc["value"], ioc["type"], vt_key)
if enrichment:
ioc["enrichment"] = enrichment
enriched_count += 1
ioc["confidence"] = calculate_confidence(ioc, enrichment)
else:
for ioc in iocs:
ioc["confidence"] = calculate_confidence(ioc)
iocs = [i for i in iocs if i.get("confidence", 0) >= args.min_confidence]
iocs.sort(key=lambda x: -x.get("confidence", 0))
format_summary(iocs, enriched_count)
report = {
"timestamp": datetime.now(timezone.utc).isoformat(),
"tool": "TI Lifecycle Manager",
"source": args.source,
"total_iocs": len(iocs),
"enriched": enriched_count,
"iocs": iocs,
}
if args.output:
with open(args.output, "w") as f:
json.dump(report, f, indent=2)
print(f"\n[+] Report saved to {args.output}")
elif args.verbose:
print(json.dumps(report, indent=2))
if __name__ == "__main__":
main()