Files
Anthropic-Cybersecurity-Skills/skills/building-detection-rule-with-splunk-spl/scripts/process.py
T

339 lines
14 KiB
Python

#!/usr/bin/env python3
"""
Splunk SPL Detection Rule Builder and Validator
Generates, validates, and manages Splunk SPL detection rules
for SOC correlation searches. Supports MITRE ATT&CK mapping
and rule quality scoring.
"""
import json
import re
import hashlib
from datetime import datetime
from typing import Optional
MITRE_TECHNIQUES = {
"T1110.001": {"name": "Password Guessing", "tactic": "Credential Access"},
"T1110.003": {"name": "Password Spraying", "tactic": "Credential Access"},
"T1059.001": {"name": "PowerShell", "tactic": "Execution"},
"T1059.003": {"name": "Windows Command Shell", "tactic": "Execution"},
"T1021.002": {"name": "SMB/Windows Admin Shares", "tactic": "Lateral Movement"},
"T1021.001": {"name": "Remote Desktop Protocol", "tactic": "Lateral Movement"},
"T1048": {"name": "Exfiltration Over C2 Channel", "tactic": "Exfiltration"},
"T1048.003": {"name": "Exfiltration Over Unencrypted Protocol", "tactic": "Exfiltration"},
"T1053.005": {"name": "Scheduled Task", "tactic": "Persistence"},
"T1003.001": {"name": "LSASS Memory", "tactic": "Credential Access"},
"T1078": {"name": "Valid Accounts", "tactic": "Defense Evasion"},
"T1078.002": {"name": "Domain Accounts", "tactic": "Defense Evasion"},
"T1547.001": {"name": "Registry Run Keys", "tactic": "Persistence"},
"T1055": {"name": "Process Injection", "tactic": "Defense Evasion"},
"T1071.001": {"name": "Web Protocols", "tactic": "Command and Control"},
"T1036.005": {"name": "Match Legitimate Name", "tactic": "Defense Evasion"},
"T1027": {"name": "Obfuscated Files or Information", "tactic": "Defense Evasion"},
"T1218.011": {"name": "Rundll32", "tactic": "Defense Evasion"},
"T1543.003": {"name": "Windows Service", "tactic": "Persistence"},
"T1105": {"name": "Ingress Tool Transfer", "tactic": "Command and Control"},
}
class SplunkDetectionRule:
"""Represents a Splunk SPL detection rule with metadata and validation."""
def __init__(
self,
name: str,
description: str,
spl_query: str,
mitre_techniques: list,
severity: str = "medium",
schedule_cron: str = "*/15 * * * *",
time_window: str = "-20m",
data_sources: Optional[list] = None,
false_positive_notes: Optional[list] = None,
):
self.name = name
self.description = description
self.spl_query = spl_query
self.mitre_techniques = mitre_techniques
self.severity = severity
self.schedule_cron = schedule_cron
self.time_window = time_window
self.data_sources = data_sources or []
self.false_positive_notes = false_positive_notes or []
self.created = datetime.utcnow().isoformat()
self.rule_id = self._generate_rule_id()
def _generate_rule_id(self) -> str:
hash_input = f"{self.name}:{self.spl_query}"
return f"SPL-{hashlib.sha256(hash_input.encode()).hexdigest()[:12].upper()}"
def validate(self) -> dict:
"""Validate the SPL detection rule for common issues."""
issues = []
score = 100
# Check for missing time constraint
if "earliest=" not in self.spl_query and "span=" not in self.spl_query:
issues.append("WARNING: No time constraint in query - may scan too much data")
score -= 10
# Check for wildcard-heavy searches
wildcard_count = self.spl_query.count("*")
if wildcard_count > 5:
issues.append(f"WARNING: {wildcard_count} wildcards detected - may impact performance")
score -= 5 * min(wildcard_count - 5, 4)
# Check for aggregation
agg_commands = ["stats", "eventstats", "streamstats", "tstats", "chart", "timechart"]
has_aggregation = any(cmd in self.spl_query.lower() for cmd in agg_commands)
if not has_aggregation:
issues.append("WARNING: No aggregation command - rule may generate excessive alerts")
score -= 15
# Check for threshold
if "where" not in self.spl_query.lower():
issues.append("WARNING: No where clause - rule has no threshold filtering")
score -= 15
# Check for enrichment
if "lookup" not in self.spl_query.lower():
issues.append("INFO: No lookup enrichment - consider adding asset/identity context")
score -= 5
# Check MITRE mapping
if not self.mitre_techniques:
issues.append("WARNING: No MITRE ATT&CK technique mapped")
score -= 10
for tech_id in self.mitre_techniques:
if tech_id not in MITRE_TECHNIQUES:
issues.append(f"WARNING: Unknown MITRE technique ID: {tech_id}")
score -= 5
# Check severity is valid
valid_severities = ["informational", "low", "medium", "high", "critical"]
if self.severity not in valid_severities:
issues.append(f"ERROR: Invalid severity '{self.severity}' - must be one of {valid_severities}")
score -= 20
# Check for eval description
if "eval description" not in self.spl_query.lower() and "eval rule_description" not in self.spl_query.lower():
issues.append("INFO: No description field in output - analysts will lack context")
score -= 5
# Check for CIM data model usage
if "datamodel=" in self.spl_query.lower() or "tstats" in self.spl_query.lower():
score += 5 # Bonus for using CIM-accelerated searches
return {
"rule_id": self.rule_id,
"rule_name": self.name,
"valid": score >= 60,
"quality_score": max(0, min(100, score)),
"issues": issues,
"issue_count": len(issues),
}
def to_splunk_savedsearch_conf(self) -> str:
"""Generate Splunk savedsearches.conf stanza for the rule."""
mitre_str = ", ".join(self.mitre_techniques)
stanza = f"""[{self.name}]
search = {self.spl_query}
description = {self.description}
dispatch.earliest_time = {self.time_window}
dispatch.latest_time = now
cron_schedule = {self.schedule_cron}
is_scheduled = 1
enableSched = 1
alert.severity = {self._severity_to_int()}
alert.suppress = 1
alert.suppress.period = 1h
alert.suppress.fields = src_ip
action.notable = 1
action.notable.param.rule_title = {self.name}
action.notable.param.rule_description = {self.description}
action.notable.param.severity = {self.severity}
action.notable.param.security_domain = threat
action.notable.param.drilldown_name = View triggering events
action.notable.param.drilldown_search = {self.spl_query}
action.notable.param.mitre_attack = {mitre_str}
"""
return stanza
def _severity_to_int(self) -> int:
mapping = {"informational": 1, "low": 2, "medium": 3, "high": 4, "critical": 5}
return mapping.get(self.severity, 3)
def to_json(self) -> str:
return json.dumps(
{
"rule_id": self.rule_id,
"name": self.name,
"description": self.description,
"spl_query": self.spl_query,
"mitre_techniques": self.mitre_techniques,
"severity": self.severity,
"schedule_cron": self.schedule_cron,
"time_window": self.time_window,
"data_sources": self.data_sources,
"false_positive_notes": self.false_positive_notes,
"created": self.created,
},
indent=2,
)
class DetectionRuleLibrary:
"""Manages a collection of Splunk detection rules."""
def __init__(self):
self.rules = []
def add_rule(self, rule: SplunkDetectionRule):
self.rules.append(rule)
def validate_all(self) -> dict:
results = {"total_rules": len(self.rules), "valid_rules": 0, "invalid_rules": 0, "details": []}
for rule in self.rules:
validation = rule.validate()
results["details"].append(validation)
if validation["valid"]:
results["valid_rules"] += 1
else:
results["invalid_rules"] += 1
return results
def get_mitre_coverage(self) -> dict:
coverage = {}
for rule in self.rules:
for tech_id in rule.mitre_techniques:
if tech_id not in coverage:
coverage[tech_id] = {
"technique": MITRE_TECHNIQUES.get(tech_id, {}).get("name", "Unknown"),
"tactic": MITRE_TECHNIQUES.get(tech_id, {}).get("tactic", "Unknown"),
"rules": [],
}
coverage[tech_id]["rules"].append(rule.name)
return {
"techniques_covered": len(coverage),
"total_known_techniques": len(MITRE_TECHNIQUES),
"coverage_percentage": round(len(coverage) / len(MITRE_TECHNIQUES) * 100, 1),
"coverage_map": coverage,
}
def export_savedsearches_conf(self) -> str:
output = "# Auto-generated Splunk savedsearches.conf\n"
output += f"# Generated: {datetime.utcnow().isoformat()}\n"
output += f"# Total Rules: {len(self.rules)}\n\n"
for rule in self.rules:
output += rule.to_splunk_savedsearch_conf() + "\n"
return output
def build_sample_detection_library() -> DetectionRuleLibrary:
"""Build a sample detection rule library with common SOC use cases."""
library = DetectionRuleLibrary()
library.add_rule(
SplunkDetectionRule(
name="Brute Force - Multiple Failed Logins",
description="Detects brute force attacks with multiple failed login attempts from a single source",
spl_query=(
'| tstats summariesonly=true count from datamodel=Authentication '
'where Authentication.action=failure by Authentication.src, Authentication.user, _time span=5m '
'| rename "Authentication.*" as * '
'| stats count as total_failures dc(user) as unique_users values(user) as targeted_users by src '
'| where total_failures > 20 AND unique_users > 3 '
'| lookup asset_lookup ip as src OUTPUT priority as asset_priority '
'| eval severity=case(unique_users > 10, "critical", unique_users > 5, "high", true(), "medium") '
'| eval description="Brute force detected from ".src." targeting ".unique_users." accounts"'
),
mitre_techniques=["T1110.001"],
severity="high",
schedule_cron="*/5 * * * *",
time_window="-10m",
data_sources=["Windows Security Event Log", "Linux Auth Log"],
false_positive_notes=["Service accounts with expired passwords", "Misconfigured applications"],
)
)
library.add_rule(
SplunkDetectionRule(
name="Suspicious PowerShell Execution",
description="Detects encoded or obfuscated PowerShell commands indicating potential malicious activity",
spl_query=(
'index=wineventlog sourcetype=WinEventLog:Security EventCode=4104 '
'| where match(ScriptBlockText, "(?i)(encodedcommand|invoke-expression|iex|downloadstring|frombase64string|net\\.webclient|invoke-mimikatz)") '
'| stats count values(ScriptBlockText) as commands by Computer, UserName '
'| where count > 0 '
'| lookup identity_lookup identity as UserName OUTPUT department, manager '
'| eval severity="high" '
'| eval description="Suspicious PowerShell on ".Computer." by ".UserName'
),
mitre_techniques=["T1059.001", "T1027"],
severity="high",
data_sources=["Windows PowerShell Script Block Logging"],
false_positive_notes=["IT automation scripts using encoded commands", "SCCM deployment scripts"],
)
)
library.add_rule(
SplunkDetectionRule(
name="Lateral Movement - Multiple Host Access",
description="Detects a user or source IP accessing an unusual number of hosts via network logon",
spl_query=(
'| tstats summariesonly=true dc(Authentication.dest) as unique_hosts '
'from datamodel=Authentication where Authentication.action=success Authentication.Logon_Type=3 '
'by Authentication.src, Authentication.user, _time span=1h '
'| rename "Authentication.*" as * '
'| where unique_hosts > 5 '
'| lookup asset_lookup ip as src OUTPUT asset_name, asset_category '
'| eval severity=case(unique_hosts > 20, "critical", unique_hosts > 10, "high", true(), "medium") '
'| eval description=user." accessed ".unique_hosts." hosts from ".src." in 1 hour"'
),
mitre_techniques=["T1021.002", "T1078.002"],
severity="high",
data_sources=["Windows Security Event Log"],
false_positive_notes=["Vulnerability scanners", "IT management tools", "Software deployment systems"],
)
)
return library
if __name__ == "__main__":
library = build_sample_detection_library()
print("=" * 70)
print("SPLUNK SPL DETECTION RULE LIBRARY")
print("=" * 70)
# Validate all rules
validation = library.validate_all()
print(f"\nTotal Rules: {validation['total_rules']}")
print(f"Valid Rules: {validation['valid_rules']}")
print(f"Invalid Rules: {validation['invalid_rules']}")
for detail in validation["details"]:
print(f"\n--- {detail['rule_name']} ---")
print(f" Rule ID: {detail['rule_id']}")
print(f" Quality Score: {detail['quality_score']}/100")
print(f" Valid: {detail['valid']}")
for issue in detail["issues"]:
print(f" {issue}")
# MITRE coverage
coverage = library.get_mitre_coverage()
print(f"\nMITRE ATT&CK Coverage: {coverage['techniques_covered']}/{coverage['total_known_techniques']} ({coverage['coverage_percentage']}%)")
for tech_id, info in coverage["coverage_map"].items():
print(f" {tech_id} ({info['technique']}): {', '.join(info['rules'])}")
# Export savedsearches.conf
conf = library.export_savedsearches_conf()
print(f"\n{'=' * 70}")
print("GENERATED savedsearches.conf")
print("=" * 70)
print(conf)