Files
T
mukul975 c21af3347e Complete folder anatomy for all 649 cybersecurity skills + update LICENSE to Mahipal
- Add scripts/agent.py and references/api-reference.md to all remaining skills
- Update all 648 LICENSE files: copyright now reads 'Mahipal'
- Add implementing-security-monitoring-with-datadog (new skill with full anatomy)
- All 649 skills now have: SKILL.md, LICENSE, scripts/agent.py, references/api-reference.md
2026-03-11 00:22:12 +01:00

169 lines
7.6 KiB
Python

#!/usr/bin/env python3
"""Agent for performing log source onboarding in SIEM platforms."""
import json
import argparse
import socket
import re
from datetime import datetime
from pathlib import Path
SYSLOG_FACILITIES = {0: "kern", 1: "user", 2: "mail", 3: "daemon", 4: "auth", 5: "syslog",
6: "lpr", 7: "news", 10: "authpriv", 13: "audit", 16: "local0",
17: "local1", 18: "local2", 19: "local3", 20: "local4",
21: "local5", 22: "local6", 23: "local7"}
LOG_FORMAT_PATTERNS = {
"syslog_rfc3164": re.compile(r"^<\d+>\w{3}\s+\d+\s+\d+:\d+:\d+"),
"syslog_rfc5424": re.compile(r"^<\d+>\d+\s+\d{4}-\d{2}-\d{2}T"),
"cef": re.compile(r"^CEF:\d+\|"),
"leef": re.compile(r"^LEEF:\d+\.\d+\|"),
"json": re.compile(r"^\s*\{"),
"csv": re.compile(r"^[^,]+,[^,]+,[^,]+"),
"windows_event": re.compile(r"EventID|EventRecordID"),
"apache_combined": re.compile(r'^\S+ \S+ \S+ \[.+\] "\w+ .+ HTTP/'),
}
def detect_log_format(sample_file):
"""Detect log format from a sample file."""
content = Path(sample_file).read_text(encoding="utf-8", errors="replace")
lines = [l for l in content.splitlines() if l.strip()][:50]
format_votes = {}
for line in lines:
for fmt, pattern in LOG_FORMAT_PATTERNS.items():
if pattern.search(line):
format_votes[fmt] = format_votes.get(fmt, 0) + 1
if not format_votes:
return {"format": "unknown", "sample_lines": lines[:5]}
detected = max(format_votes, key=format_votes.get)
return {
"sample_file": sample_file,
"detected_format": detected,
"confidence": round(format_votes[detected] / len(lines) * 100, 1),
"format_votes": format_votes,
"total_lines": len(lines),
"sample_lines": lines[:5],
}
def validate_syslog_connectivity(host, port=514, protocol="udp"):
"""Test syslog connectivity to SIEM collector."""
results = {"host": host, "port": port, "protocol": protocol}
try:
if protocol == "udp":
sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
sock.settimeout(5)
test_msg = f"<14>1 {datetime.utcnow().isoformat()} test-agent siem-onboard - - - Test syslog connectivity"
sock.sendto(test_msg.encode(), (host, port))
results["status"] = "SENT"
results["message"] = "UDP message sent (delivery not guaranteed)"
else:
sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
sock.settimeout(5)
sock.connect((host, port))
test_msg = f"<14>1 {datetime.utcnow().isoformat()} test-agent siem-onboard - - - Test syslog connectivity\n"
sock.send(test_msg.encode())
results["status"] = "CONNECTED"
results["message"] = "TCP connection established and test message sent"
sock.close()
except Exception as e:
results["status"] = "FAILED"
results["error"] = str(e)
return results
def generate_parsing_config(log_format, source_type, fields=None):
"""Generate SIEM parsing configuration for common log formats."""
configs = {
"syslog_rfc3164": {
"splunk": {
"props_conf": f"[{source_type}]\nTIME_FORMAT = %b %d %H:%M:%S\nTIME_PREFIX = ^<\\d+>\nSHOULD_LINEMERGE = false\nLINE_BREAKER = ([\\r\\n]+)",
"transforms_conf": f"[{source_type}_extract]\nREGEX = ^<(\\d+)>(\\w{{3}}\\s+\\d+\\s+\\d+:\\d+:\\d+)\\s+(\\S+)\\s+(\\S+?)(?:\\[(\\d+)\\])?:\\s+(.*)\nFORMAT = priority::$1 timestamp::$2 host::$3 program::$4 pid::$5 message::$6",
},
"elastic": {
"filebeat_module": {"module": "system", "syslog": {"enabled": True, "var.paths": ["/var/log/syslog"]}},
},
},
"json": {
"splunk": {
"props_conf": f"[{source_type}]\nKV_MODE = json\nSHOULD_LINEMERGE = false\nTIME_FORMAT = %Y-%m-%dT%H:%M:%S",
},
"elastic": {
"filebeat_input": {"type": "filestream", "parsers": [{"ndjson": {"keys_under_root": True, "add_error_key": True}}]},
},
},
"cef": {
"splunk": {
"props_conf": f"[{source_type}]\nSHOULD_LINEMERGE = false\nTIME_FORMAT = %b %d %Y %H:%M:%S\nTRANSFORMS-cef = cef_header,cef_extension",
},
"elastic": {
"logstash_filter": 'filter { if [type] == "cef" { grok { match => { "message" => "CEF:%{INT:cef_version}\\|%{DATA:vendor}\\|%{DATA:product}\\|%{DATA:version}\\|%{DATA:signature}\\|%{DATA:name}\\|%{INT:severity}\\|%{GREEDYDATA:extension}" } } } }',
},
},
}
config = configs.get(log_format, {})
return {
"log_format": log_format,
"source_type": source_type,
"configurations": config if config else {"note": f"No template for format: {log_format}"},
}
def create_onboarding_checklist(source_name, log_format, siem_host, siem_port=514):
"""Generate a log source onboarding checklist."""
return {
"source_name": source_name,
"log_format": log_format,
"timestamp": datetime.utcnow().isoformat(),
"checklist": [
{"step": 1, "task": "Collect log samples (minimum 100 lines)", "status": "pending"},
{"step": 2, "task": f"Validate format: {log_format}", "status": "pending"},
{"step": 3, "task": f"Test connectivity to {siem_host}:{siem_port}", "status": "pending"},
{"step": 4, "task": "Create source type / index configuration", "status": "pending"},
{"step": 5, "task": "Configure field extraction / parsing rules", "status": "pending"},
{"step": 6, "task": "Verify timestamp parsing and timezone", "status": "pending"},
{"step": 7, "task": "Validate event flow (check event count)", "status": "pending"},
{"step": 8, "task": "Create correlation rules / alerts", "status": "pending"},
{"step": 9, "task": "Document source in CMDB", "status": "pending"},
{"step": 10, "task": "Monitor for 48h and verify parsing accuracy", "status": "pending"},
],
}
def main():
parser = argparse.ArgumentParser(description="SIEM Log Source Onboarding Agent")
sub = parser.add_subparsers(dest="command")
d = sub.add_parser("detect", help="Detect log format")
d.add_argument("--file", required=True)
v = sub.add_parser("validate", help="Test syslog connectivity")
v.add_argument("--host", required=True)
v.add_argument("--port", type=int, default=514)
v.add_argument("--protocol", default="udp", choices=["udp", "tcp"])
p = sub.add_parser("parse-config", help="Generate parsing config")
p.add_argument("--format", required=True, choices=list(LOG_FORMAT_PATTERNS.keys()))
p.add_argument("--source-type", required=True)
c = sub.add_parser("checklist", help="Generate onboarding checklist")
c.add_argument("--source", required=True)
c.add_argument("--format", required=True)
c.add_argument("--siem-host", required=True)
c.add_argument("--siem-port", type=int, default=514)
args = parser.parse_args()
if args.command == "detect":
result = detect_log_format(args.file)
elif args.command == "validate":
result = validate_syslog_connectivity(args.host, args.port, args.protocol)
elif args.command == "parse-config":
result = generate_parsing_config(args.format, args.source_type)
elif args.command == "checklist":
result = create_onboarding_checklist(args.source, args.format, args.siem_host, args.siem_port)
else:
parser.print_help()
return
print(json.dumps(result, indent=2, default=str))
if __name__ == "__main__":
main()