Files
mukul975 27c6414ca5 Add folder anatomy (scripts/agent.py + references/api-reference.md) for 648 cybersecurity skills
Complete skill folder anatomy across all cybersecurity skills:
- scripts/agent.py: 80-150 line Python agents using real libraries (impacket,
  boto3, azure-mgmt-*, kubernetes, pefile, yara, scapy, shodan, stix2, etc.)
- references/api-reference.md: real API documentation with method signatures
- LICENSE: MIT license for all skill folders
2026-03-10 21:02:12 +01:00

214 lines
8.0 KiB
Python

#!/usr/bin/env python3
"""Arkime Network Traffic Analysis Agent - Queries Arkime API for session analysis and anomaly detection."""
import json
import logging
import argparse
from datetime import datetime
from collections import defaultdict
import requests
from requests.auth import HTTPDigestAuth
logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s")
logger = logging.getLogger(__name__)
def arkime_request(base_url, endpoint, auth, params=None):
"""Make an authenticated request to Arkime API v3."""
url = f"{base_url}{endpoint}"
try:
resp = requests.get(url, auth=HTTPDigestAuth(*auth), params=params, verify=False, timeout=30)
resp.raise_for_status()
return resp.json()
except requests.RequestException as e:
logger.error("Arkime API error %s: %s", endpoint, e)
return None
def search_sessions(base_url, auth, expression, date_range=1, length=500):
"""Search Arkime sessions with an expression filter."""
params = {
"date": date_range,
"expression": expression,
"length": length,
"order": "lastPacket:desc",
}
data = arkime_request(base_url, "/api/sessions", auth, params)
if data and "data" in data:
logger.info("Found %d sessions for expression: %s", len(data["data"]), expression)
return data["data"]
return []
def get_connections(base_url, auth, expression, date_range=1):
"""Get connection graph data from Arkime."""
params = {"date": date_range, "expression": expression}
data = arkime_request(base_url, "/api/connections", auth, params)
if data:
nodes = data.get("nodes", [])
links = data.get("links", [])
logger.info("Connection graph: %d nodes, %d links", len(nodes), len(links))
return {"nodes": nodes, "links": links}
return {"nodes": [], "links": []}
def get_spi_view(base_url, auth, expression, date_range=1):
"""Get SPI view field statistics from Arkime."""
params = {"date": date_range, "expression": expression, "spi": "srcIp,dstIp,dstPort"}
data = arkime_request(base_url, "/api/spiview", auth, params)
return data if data else {}
def detect_beaconing(sessions, interval_threshold=0.15):
"""Detect C2 beaconing by analyzing connection intervals."""
connections = defaultdict(list)
for s in sessions:
key = (s.get("srcIp", ""), s.get("dstIp", ""), s.get("dstPort", 0))
connections[key].append(s.get("lastPacket", 0))
beacons = []
for (src, dst, port), timestamps in connections.items():
if len(timestamps) < 10:
continue
timestamps.sort()
intervals = [timestamps[i + 1] - timestamps[i] for i in range(len(timestamps) - 1)]
if not intervals:
continue
avg_interval = sum(intervals) / len(intervals)
if avg_interval == 0:
continue
std_dev = (sum((i - avg_interval) ** 2 for i in intervals) / len(intervals)) ** 0.5
jitter_ratio = std_dev / avg_interval
if jitter_ratio < interval_threshold:
beacons.append({
"src_ip": src,
"dst_ip": dst,
"dst_port": port,
"session_count": len(timestamps),
"avg_interval_sec": round(avg_interval / 1000, 1),
"jitter_ratio": round(jitter_ratio, 4),
"confidence": "high" if jitter_ratio < 0.05 else "medium",
"severity": "critical",
})
logger.warning("Beaconing: %s -> %s:%d (jitter: %.4f)", src, dst, port, jitter_ratio)
return beacons
def detect_dns_tunneling(sessions, query_len_threshold=50):
"""Detect DNS tunneling via abnormally long DNS queries."""
dns_sessions = [s for s in sessions if s.get("dstPort") == 53]
suspicious = []
src_stats = defaultdict(lambda: {"count": 0, "total_bytes": 0})
for s in dns_sessions:
src = s.get("srcIp", "")
src_stats[src]["count"] += 1
src_stats[src]["total_bytes"] += s.get("srcBytes", 0) + s.get("dstBytes", 0)
for src, stats in src_stats.items():
avg_bytes = stats["total_bytes"] / max(stats["count"], 1)
if stats["count"] > 100 and avg_bytes > query_len_threshold:
suspicious.append({
"src_ip": src,
"dns_query_count": stats["count"],
"avg_bytes_per_query": round(avg_bytes, 1),
"total_bytes": stats["total_bytes"],
"severity": "high",
"indicator": "DNS tunneling - high volume with large payloads",
})
return suspicious
def detect_large_transfers(sessions, threshold_mb=100):
"""Detect unusually large data transfers."""
threshold_bytes = threshold_mb * 1024 * 1024
large = []
for s in sessions:
total = s.get("srcBytes", 0) + s.get("dstBytes", 0)
if total > threshold_bytes:
large.append({
"src_ip": s.get("srcIp", ""),
"dst_ip": s.get("dstIp", ""),
"dst_port": s.get("dstPort", 0),
"total_bytes": total,
"total_mb": round(total / (1024 * 1024), 2),
"severity": "high",
})
return large
def detect_tls_anomalies(sessions):
"""Detect TLS certificate anomalies (self-signed, expired, unusual issuers)."""
anomalies = []
for s in sessions:
tls = s.get("tls", {})
if not tls:
continue
ja3 = s.get("srcJa3", "")
issuer_cn = tls.get("issuerCN", "")
not_after = tls.get("notAfter", 0)
if issuer_cn and issuer_cn == tls.get("subjectCN", ""):
anomalies.append({
"src_ip": s.get("srcIp", ""),
"dst_ip": s.get("dstIp", ""),
"issue": "self-signed certificate",
"issuer": issuer_cn,
"severity": "medium",
})
if not_after and not_after < int(datetime.utcnow().timestamp() * 1000):
anomalies.append({
"src_ip": s.get("srcIp", ""),
"dst_ip": s.get("dstIp", ""),
"issue": "expired certificate",
"issuer": issuer_cn,
"severity": "medium",
})
return anomalies
def generate_report(beacons, dns_tunneling, large_transfers, tls_anomalies, session_count):
"""Generate network traffic analysis report."""
all_findings = beacons + dns_tunneling + large_transfers + tls_anomalies
critical = [f for f in all_findings if f.get("severity") == "critical"]
report = {
"timestamp": datetime.utcnow().isoformat(),
"sessions_analyzed": session_count,
"findings_total": len(all_findings),
"critical_count": len(critical),
"beaconing_detected": beacons,
"dns_tunneling": dns_tunneling,
"large_transfers": large_transfers,
"tls_anomalies": tls_anomalies,
}
print(f"ARKIME REPORT: {len(all_findings)} findings ({len(critical)} critical) from {session_count} sessions")
return report
def main():
parser = argparse.ArgumentParser(description="Arkime Network Traffic Analysis Agent")
parser.add_argument("--arkime-url", required=True, help="Arkime viewer URL")
parser.add_argument("--user", required=True)
parser.add_argument("--password", required=True)
parser.add_argument("--expression", default="*", help="Arkime search expression")
parser.add_argument("--date-range", type=int, default=1, help="Date range in hours")
parser.add_argument("--output", default="arkime_report.json")
args = parser.parse_args()
auth = (args.user, args.password)
sessions = search_sessions(args.arkime_url, auth, args.expression, args.date_range)
beacons = detect_beaconing(sessions)
dns_tunnel = detect_dns_tunneling(sessions)
large = detect_large_transfers(sessions)
tls = detect_tls_anomalies(sessions)
report = generate_report(beacons, dns_tunnel, large, tls, len(sessions))
with open(args.output, "w") as f:
json.dump(report, f, indent=2)
logger.info("Report saved to %s", args.output)
if __name__ == "__main__":
main()