mirror of
https://github.com/mukul975/Anthropic-Cybersecurity-Skills.git
synced 2026-06-11 21:54:56 +03:00
266 lines
8.9 KiB
Python
266 lines
8.9 KiB
Python
#!/usr/bin/env python3
|
|
"""Threat intelligence lifecycle management agent.
|
|
|
|
Manages the threat intelligence lifecycle: collection from feeds,
|
|
processing/normalization of IOCs, analysis/enrichment via VirusTotal
|
|
and AbuseIPDB, dissemination to SIEM/firewalls, and tracking of
|
|
IOC aging and confidence scoring.
|
|
"""
|
|
import argparse
|
|
import csv
|
|
import hashlib
|
|
import json
|
|
import os
|
|
import re
|
|
import sys
|
|
from datetime import datetime, timezone, timedelta
|
|
|
|
try:
|
|
import requests
|
|
except ImportError:
|
|
requests = None
|
|
|
|
|
|
IOC_PATTERNS = {
|
|
"ipv4": re.compile(r'\b(?:\d{1,3}\.){3}\d{1,3}\b'),
|
|
"domain": re.compile(r'\b(?:[a-z0-9](?:[a-z0-9-]{0,61}[a-z0-9])?\.)+[a-z]{2,}\b', re.I),
|
|
"md5": re.compile(r'\b[a-fA-F0-9]{32}\b'),
|
|
"sha1": re.compile(r'\b[a-fA-F0-9]{40}\b'),
|
|
"sha256": re.compile(r'\b[a-fA-F0-9]{64}\b'),
|
|
"url": re.compile(r'https?://[^\s<>"{}|\\^`\[\]]+'),
|
|
"email": re.compile(r'[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}'),
|
|
}
|
|
|
|
|
|
def extract_iocs(text):
|
|
"""Extract IOCs from unstructured text."""
|
|
iocs = {}
|
|
for ioc_type, pattern in IOC_PATTERNS.items():
|
|
matches = set(pattern.findall(text))
|
|
# Filter out private IPs for ipv4
|
|
if ioc_type == "ipv4":
|
|
matches = {ip for ip in matches
|
|
if not ip.startswith(("10.", "192.168.", "127.", "0."))
|
|
and not ip.startswith("172.") or not (16 <= int(ip.split(".")[1]) <= 31)}
|
|
if matches:
|
|
iocs[ioc_type] = sorted(matches)
|
|
return iocs
|
|
|
|
|
|
def load_ioc_feed(source):
|
|
"""Load IOCs from a file (JSON, CSV, or plain text)."""
|
|
ext = os.path.splitext(source)[1].lower()
|
|
iocs = []
|
|
|
|
if ext == ".json":
|
|
with open(source, "r") as f:
|
|
data = json.load(f)
|
|
if isinstance(data, list):
|
|
iocs = data
|
|
elif isinstance(data, dict):
|
|
iocs = data.get("indicators", data.get("iocs", data.get("data", [])))
|
|
elif ext == ".csv":
|
|
with open(source, "r", newline="") as f:
|
|
reader = csv.DictReader(f)
|
|
iocs = list(reader)
|
|
else:
|
|
with open(source, "r") as f:
|
|
text = f.read()
|
|
extracted = extract_iocs(text)
|
|
for ioc_type, values in extracted.items():
|
|
for v in values:
|
|
iocs.append({"type": ioc_type, "value": v, "source": source})
|
|
|
|
return iocs
|
|
|
|
|
|
def normalize_ioc(ioc):
|
|
"""Normalize IOC into standard format."""
|
|
if isinstance(ioc, str):
|
|
for ioc_type, pattern in IOC_PATTERNS.items():
|
|
if pattern.fullmatch(ioc):
|
|
return {"type": ioc_type, "value": ioc.lower().strip()}
|
|
return {"type": "unknown", "value": ioc.strip()}
|
|
|
|
return {
|
|
"type": (ioc.get("type") or ioc.get("indicator_type") or "unknown").lower(),
|
|
"value": (ioc.get("value") or ioc.get("indicator") or "").lower().strip(),
|
|
"source": ioc.get("source", ""),
|
|
"confidence": ioc.get("confidence", 50),
|
|
"first_seen": ioc.get("first_seen", ""),
|
|
"last_seen": ioc.get("last_seen", ""),
|
|
"tags": ioc.get("tags", []),
|
|
"description": ioc.get("description", ""),
|
|
}
|
|
|
|
|
|
def enrich_ioc_virustotal(ioc_value, ioc_type, api_key):
|
|
"""Enrich IOC via VirusTotal API v3."""
|
|
if not requests or not api_key:
|
|
return {}
|
|
|
|
headers = {"x-apikey": api_key}
|
|
base = "https://www.virustotal.com/api/v3"
|
|
|
|
if ioc_type in ("md5", "sha1", "sha256"):
|
|
url = f"{base}/files/{ioc_value}"
|
|
elif ioc_type == "domain":
|
|
url = f"{base}/domains/{ioc_value}"
|
|
elif ioc_type == "ipv4":
|
|
url = f"{base}/ip_addresses/{ioc_value}"
|
|
elif ioc_type == "url":
|
|
url_id = hashlib.sha256(ioc_value.encode()).hexdigest()
|
|
url = f"{base}/urls/{url_id}"
|
|
else:
|
|
return {}
|
|
|
|
try:
|
|
resp = requests.get(url, headers=headers, timeout=15)
|
|
if resp.status_code == 200:
|
|
data = resp.json().get("data", {}).get("attributes", {})
|
|
stats = data.get("last_analysis_stats", {})
|
|
return {
|
|
"malicious": stats.get("malicious", 0),
|
|
"suspicious": stats.get("suspicious", 0),
|
|
"harmless": stats.get("harmless", 0),
|
|
"undetected": stats.get("undetected", 0),
|
|
"reputation": data.get("reputation", 0),
|
|
"source": "virustotal",
|
|
}
|
|
except requests.RequestException:
|
|
pass
|
|
return {}
|
|
|
|
|
|
def calculate_confidence(ioc, enrichment=None):
|
|
"""Calculate confidence score for an IOC (0-100)."""
|
|
score = ioc.get("confidence", 50)
|
|
|
|
# Boost for VT detections
|
|
if enrichment:
|
|
malicious = enrichment.get("malicious", 0)
|
|
if malicious > 10:
|
|
score = min(score + 30, 100)
|
|
elif malicious > 5:
|
|
score = min(score + 20, 100)
|
|
elif malicious > 0:
|
|
score = min(score + 10, 100)
|
|
elif enrichment.get("harmless", 0) > 20:
|
|
score = max(score - 20, 0)
|
|
|
|
# Decay based on age
|
|
first_seen = ioc.get("first_seen", "")
|
|
if first_seen:
|
|
try:
|
|
if "T" in first_seen:
|
|
seen_dt = datetime.fromisoformat(first_seen.replace("Z", "+00:00"))
|
|
else:
|
|
seen_dt = datetime.strptime(first_seen[:10], "%Y-%m-%d").replace(tzinfo=timezone.utc)
|
|
age_days = (datetime.now(timezone.utc) - seen_dt).days
|
|
if age_days > 180:
|
|
score = max(score - 20, 0)
|
|
elif age_days > 90:
|
|
score = max(score - 10, 0)
|
|
except (ValueError, TypeError):
|
|
pass
|
|
|
|
return min(max(score, 0), 100)
|
|
|
|
|
|
def format_summary(iocs, enriched_count):
|
|
"""Print lifecycle report."""
|
|
print(f"\n{'='*60}")
|
|
print(f" Threat Intelligence Lifecycle Report")
|
|
print(f"{'='*60}")
|
|
print(f" Total IOCs : {len(iocs)}")
|
|
print(f" Enriched : {enriched_count}")
|
|
|
|
by_type = {}
|
|
for ioc in iocs:
|
|
t = ioc.get("type", "unknown")
|
|
by_type[t] = by_type.get(t, 0) + 1
|
|
print(f"\n By Type:")
|
|
for t, count in sorted(by_type.items(), key=lambda x: -x[1]):
|
|
print(f" {t:12s}: {count}")
|
|
|
|
high_conf = [i for i in iocs if i.get("confidence", 0) >= 80]
|
|
med_conf = [i for i in iocs if 50 <= i.get("confidence", 0) < 80]
|
|
low_conf = [i for i in iocs if i.get("confidence", 0) < 50]
|
|
print(f"\n By Confidence:")
|
|
print(f" High (>=80) : {len(high_conf)}")
|
|
print(f" Medium : {len(med_conf)}")
|
|
print(f" Low (<50) : {len(low_conf)}")
|
|
|
|
if high_conf:
|
|
print(f"\n High-Confidence IOCs:")
|
|
for i in high_conf[:15]:
|
|
print(f" [{i['type']:8s}] {i['value'][:50]:50s} (confidence: {i.get('confidence', 0)})")
|
|
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser(description="Threat intelligence lifecycle management agent")
|
|
parser.add_argument("--source", required=True, help="IOC source file (JSON/CSV/text)")
|
|
parser.add_argument("--vt-key", help="VirusTotal API key (or VT_API_KEY env)")
|
|
parser.add_argument("--enrich", action="store_true", help="Enrich IOCs via VirusTotal")
|
|
parser.add_argument("--min-confidence", type=int, default=0, help="Min confidence to include")
|
|
parser.add_argument("--output", "-o", help="Output JSON report")
|
|
parser.add_argument("--verbose", "-v", action="store_true")
|
|
args = parser.parse_args()
|
|
|
|
vt_key = args.vt_key or os.environ.get("VT_API_KEY", "")
|
|
|
|
raw_iocs = load_ioc_feed(args.source)
|
|
print(f"[*] Loaded {len(raw_iocs)} raw IOCs from {args.source}")
|
|
|
|
iocs = [normalize_ioc(ioc) for ioc in raw_iocs]
|
|
iocs = [i for i in iocs if i.get("value")]
|
|
|
|
# Deduplicate
|
|
seen = set()
|
|
unique_iocs = []
|
|
for ioc in iocs:
|
|
key = f"{ioc['type']}:{ioc['value']}"
|
|
if key not in seen:
|
|
seen.add(key)
|
|
unique_iocs.append(ioc)
|
|
iocs = unique_iocs
|
|
print(f"[*] {len(iocs)} unique IOCs after dedup")
|
|
|
|
enriched_count = 0
|
|
if args.enrich and vt_key:
|
|
print(f"[*] Enriching IOCs via VirusTotal...")
|
|
for ioc in iocs[:100]: # Rate limit
|
|
enrichment = enrich_ioc_virustotal(ioc["value"], ioc["type"], vt_key)
|
|
if enrichment:
|
|
ioc["enrichment"] = enrichment
|
|
enriched_count += 1
|
|
ioc["confidence"] = calculate_confidence(ioc, enrichment)
|
|
else:
|
|
for ioc in iocs:
|
|
ioc["confidence"] = calculate_confidence(ioc)
|
|
|
|
iocs = [i for i in iocs if i.get("confidence", 0) >= args.min_confidence]
|
|
iocs.sort(key=lambda x: -x.get("confidence", 0))
|
|
|
|
format_summary(iocs, enriched_count)
|
|
|
|
report = {
|
|
"timestamp": datetime.now(timezone.utc).isoformat(),
|
|
"tool": "TI Lifecycle Manager",
|
|
"source": args.source,
|
|
"total_iocs": len(iocs),
|
|
"enriched": enriched_count,
|
|
"iocs": iocs,
|
|
}
|
|
|
|
if args.output:
|
|
with open(args.output, "w") as f:
|
|
json.dump(report, f, indent=2)
|
|
print(f"\n[+] Report saved to {args.output}")
|
|
elif args.verbose:
|
|
print(json.dumps(report, indent=2))
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|