Files
T
mukul975 c21af3347e Complete folder anatomy for all 649 cybersecurity skills + update LICENSE to Mahipal
- Add scripts/agent.py and references/api-reference.md to all remaining skills
- Update all 648 LICENSE files: copyright now reads 'Mahipal'
- Add implementing-security-monitoring-with-datadog (new skill with full anatomy)
- All 649 skills now have: SKILL.md, LICENSE, scripts/agent.py, references/api-reference.md
2026-03-11 00:22:12 +01:00

165 lines
6.6 KiB
Python

#!/usr/bin/env python3
"""Threat hunt hypothesis framework builder.
Generates structured threat hunting hypotheses from MITRE ATT&CK techniques,
maps data sources, defines detection logic, and tracks hunt outcomes.
"""
import sys
import json
import datetime
import hashlib
try:
import requests
HAS_REQUESTS = True
except ImportError:
HAS_REQUESTS = False
HUNT_MATURITY_LEVELS = {
0: "Initial - ad hoc, no documentation",
1: "Minimal - basic procedures, limited data",
2: "Procedural - documented hypotheses, repeatable",
3: "Innovative - custom analytics, threat intel driven",
4: "Leading - automated, ML-assisted, continuous",
}
DATA_SOURCE_MAP = {
"T1059.001": {"name": "PowerShell", "sources": ["Script Block Logging (4104)", "Module Logging (4103)",
"Process Creation (4688/Sysmon 1)"], "log_channel": "Microsoft-Windows-PowerShell/Operational"},
"T1053.005": {"name": "Scheduled Task", "sources": ["Task Scheduler (4698/4702)", "Sysmon Event 1"],
"log_channel": "Microsoft-Windows-TaskScheduler/Operational"},
"T1078": {"name": "Valid Accounts", "sources": ["Logon Events (4624/4625)", "Kerberos (4768/4769)"],
"log_channel": "Security"},
"T1003.001": {"name": "LSASS Memory", "sources": ["Sysmon Event 10 (ProcessAccess)", "Windows Defender alerts"],
"log_channel": "Microsoft-Windows-Sysmon/Operational"},
"T1071.001": {"name": "Web Protocols C2", "sources": ["Proxy logs", "DNS query logs", "Zeek http.log"],
"log_channel": "Proxy/DNS"},
"T1486": {"name": "Data Encrypted for Impact", "sources": ["File creation burst (Sysmon 11)",
"Canary file triggers", "VSS deletion (Sysmon 1)"], "log_channel": "Sysmon"},
"T1021.001": {"name": "Remote Desktop Protocol", "sources": ["Logon Type 10 (4624)",
"RDP connection (1149)"], "log_channel": "Security / TerminalServices-RemoteConnectionManager"},
}
def generate_hypothesis(technique_id, threat_actor=None, environment=None):
"""Generate a structured threat hunting hypothesis."""
ds = DATA_SOURCE_MAP.get(technique_id, {})
technique_name = ds.get("name", technique_id)
hyp_id = "HYP-" + hashlib.md5(
(technique_id + str(datetime.datetime.utcnow())).encode()
).hexdigest()[:8].upper()
hypothesis = {
"hypothesis_id": hyp_id,
"created": datetime.datetime.utcnow().isoformat() + "Z",
"technique_id": technique_id,
"technique_name": technique_name,
"hypothesis_statement": (
"An adversary{} may be using {} ({}) within our environment{}. "
"Evidence of this activity can be found in {}.".format(
" (" + threat_actor + ")" if threat_actor else "",
technique_name,
technique_id,
" targeting " + environment if environment else "",
", ".join(ds.get("sources", ["endpoint telemetry"])),
)
),
"data_sources": ds.get("sources", []),
"log_channel": ds.get("log_channel", "Unknown"),
"priority": "high" if technique_id in ["T1003.001", "T1486", "T1059.001"] else "medium",
"status": "planned",
}
return hypothesis
def build_hunt_plan(hypotheses, analyst="SOC Analyst"):
"""Build a hunt plan from a list of hypotheses."""
plan = {
"plan_id": "PLAN-" + datetime.datetime.utcnow().strftime("%Y%m%d"),
"created": datetime.datetime.utcnow().isoformat() + "Z",
"analyst": analyst,
"maturity_level": 2,
"maturity_description": HUNT_MATURITY_LEVELS[2],
"hypothesis_count": len(hypotheses),
"hypotheses": hypotheses,
"data_coverage": list(set(
src for h in hypotheses for src in h.get("data_sources", [])
)),
"estimated_hours": len(hypotheses) * 4,
}
return plan
def evaluate_hunt_results(hypothesis, findings_count, true_positives, false_positives):
"""Evaluate hunt execution results and update hypothesis."""
hypothesis["status"] = "completed"
hypothesis["results"] = {
"total_findings": findings_count,
"true_positives": true_positives,
"false_positives": false_positives,
"precision": round(true_positives / max(findings_count, 1), 3),
"outcome": "confirmed" if true_positives > 0 else "not_confirmed",
"recommendation": (
"Create detection rule" if true_positives > 0
else "Refine hypothesis and re-hunt with broader data"
),
}
return hypothesis
def fetch_attack_techniques():
"""Fetch MITRE ATT&CK technique list."""
if not HAS_REQUESTS:
return list(DATA_SOURCE_MAP.keys())
try:
url = "https://raw.githubusercontent.com/mitre/cti/master/enterprise-attack/enterprise-attack.json"
resp = requests.get(url, timeout=30)
bundle = resp.json()
techniques = [
obj["external_references"][0]["external_id"]
for obj in bundle.get("objects", [])
if obj.get("type") == "attack-pattern"
and obj.get("external_references")
and not obj.get("x_mitre_deprecated", False)
]
return techniques[:50]
except Exception:
return list(DATA_SOURCE_MAP.keys())
if __name__ == "__main__":
print("=" * 60)
print("Threat Hunt Hypothesis Framework")
print("Hypothesis generation, hunt planning, result tracking")
print("=" * 60)
techniques = sys.argv[1:] if len(sys.argv) > 1 else ["T1059.001", "T1078", "T1003.001", "T1486"]
actor = "APT29"
hypotheses = []
for t in techniques:
h = generate_hypothesis(t, threat_actor=actor)
hypotheses.append(h)
plan = build_hunt_plan(hypotheses)
print("\nHunt Plan: {} ({} hypotheses, ~{} hours)".format(
plan["plan_id"], plan["hypothesis_count"], plan["estimated_hours"]))
print("Maturity: {}".format(plan["maturity_description"]))
print("\n--- Hypotheses ---")
for h in hypotheses:
print(" [{}] {} - {}".format(h["priority"].upper(), h["technique_id"], h["technique_name"]))
print(" {}".format(h["hypothesis_statement"][:120] + "..."))
print(" Sources: {}".format(", ".join(h["data_sources"][:3])))
evaluated = evaluate_hunt_results(hypotheses[0], findings_count=12, true_positives=3, false_positives=9)
print("\n--- Sample Result ---")
print(" {} precision: {} -> {}".format(
evaluated["technique_id"],
evaluated["results"]["precision"],
evaluated["results"]["recommendation"]))
print("\n" + json.dumps({"hypotheses_generated": len(hypotheses)}, indent=2))