Files
mukul975 c47eed6a64 Production hardening: security fixes, code quality, 724 skills complete
- Fix 25 shell=True subprocess calls with list-based commands
- Fix 49 verify=False in defensive skills (env-var override)
- Add timeout to 231 HTTP/subprocess/socket calls
- Fix 6 SQL injection patterns with whitelist validation
- Replace 8 __import__() with standard imports
- Remove 701 unused imports across 442 files
- Add authorized-testing disclaimers to all offensive skills
- Complete 11 incomplete skill directories
- Expand 10 stub SKILL.md files with full content
- Fix 2 YAML parse errors in frontmatter
- Fix 5 pre-existing syntax errors
- Convert 22 hardcoded paths/ports to environment variables
- Back up 21 redundant skill pairs to .bak
- Fix 2 global declaration errors
- 724/724 skills with full folder anatomy (SKILL.md + agent.py + api-reference.md + LICENSE)
- 0 compile errors across all 724 agent.py files
2026-03-19 13:26:49 +01:00

252 lines
8.6 KiB
Python

#!/usr/bin/env python3
"""Authentication anomaly detection agent using UEBA analytics."""
import json
import sys
import csv
from datetime import datetime, timedelta
from math import radians, sin, cos, sqrt, atan2
from collections import Counter
def haversine_km(lat1, lon1, lat2, lon2):
"""Calculate great-circle distance between two coordinates in km."""
R = 6371
lat1, lon1, lat2, lon2 = map(radians, [lat1, lon1, lat2, lon2])
dlat = lat2 - lat1
dlon = lon2 - lon1
a = sin(dlat / 2) ** 2 + cos(lat1) * cos(lat2) * sin(dlon / 2) ** 2
return R * 2 * atan2(sqrt(a), sqrt(1 - a))
def load_auth_logs(csv_path):
"""Load authentication logs from CSV with columns:
timestamp,user,source_ip,result,lat,lon,city,country,app,device
"""
events = []
with open(csv_path, "r") as f:
reader = csv.DictReader(f)
for row in reader:
row["lat"] = float(row["lat"]) if row.get("lat") else None
row["lon"] = float(row["lon"]) if row.get("lon") else None
events.append(row)
return events
def detect_impossible_travel(events, max_speed_kmh=900):
"""Detect logins from locations requiring travel speed above threshold."""
alerts = []
by_user = {}
for e in events:
user = e.get("user", "")
if user not in by_user:
by_user[user] = []
by_user[user].append(e)
for user, user_events in by_user.items():
successful = [e for e in user_events if e.get("result") == "success"]
successful.sort(key=lambda x: x.get("timestamp", ""))
for i in range(1, len(successful)):
prev = successful[i - 1]
curr = successful[i]
if not prev.get("lat") or not curr.get("lat"):
continue
if prev["lat"] is None or curr["lat"] is None:
continue
dist = haversine_km(prev["lat"], prev["lon"], curr["lat"], curr["lon"])
try:
t1 = datetime.fromisoformat(prev["timestamp"].replace("Z", "+00:00"))
t2 = datetime.fromisoformat(curr["timestamp"].replace("Z", "+00:00"))
hours = (t2 - t1).total_seconds() / 3600
except Exception:
continue
if hours <= 0 or dist < 100:
continue
speed = dist / hours
if speed > max_speed_kmh:
alerts.append({
"type": "IMPOSSIBLE_TRAVEL",
"severity": "HIGH",
"user": user,
"from": f"{prev.get('city', '?')}, {prev.get('country', '?')}",
"to": f"{curr.get('city', '?')}, {curr.get('country', '?')}",
"distance_km": round(dist, 1),
"time_hours": round(hours, 2),
"speed_kmh": round(speed, 1),
"ip_from": prev.get("source_ip"),
"ip_to": curr.get("source_ip"),
})
return alerts
def detect_brute_force(events, threshold=10, window_min=10):
"""Detect brute force: many failures for same user in time window."""
alerts = []
by_user = {}
for e in events:
if e.get("result") == "failure":
user = e.get("user", "")
if user not in by_user:
by_user[user] = []
by_user[user].append(e)
for user, fails in by_user.items():
fails.sort(key=lambda x: x.get("timestamp", ""))
for i, event in enumerate(fails):
try:
t_start = datetime.fromisoformat(event["timestamp"].replace("Z", "+00:00"))
t_end = t_start + timedelta(minutes=window_min)
except Exception:
continue
window = [
f for f in fails
if t_start <= datetime.fromisoformat(f["timestamp"].replace("Z", "+00:00")) <= t_end
]
if len(window) >= threshold:
ips = list(set(w.get("source_ip", "") for w in window))
alerts.append({
"type": "BRUTE_FORCE",
"severity": "HIGH",
"user": user,
"failures": len(window),
"window_minutes": window_min,
"source_ips": ips,
"distributed": len(ips) > 1,
})
break
return alerts
def detect_password_spray(events, user_threshold=10, window_min=30):
"""Detect password spray: many users targeted from same IP."""
alerts = []
by_ip = {}
for e in events:
if e.get("result") == "failure":
ip = e.get("source_ip", "")
if ip not in by_ip:
by_ip[ip] = []
by_ip[ip].append(e)
for ip, fails in by_ip.items():
fails.sort(key=lambda x: x.get("timestamp", ""))
for event in fails:
try:
t_start = datetime.fromisoformat(event["timestamp"].replace("Z", "+00:00"))
t_end = t_start + timedelta(minutes=window_min)
except Exception:
continue
window = [
f for f in fails
if t_start <= datetime.fromisoformat(f["timestamp"].replace("Z", "+00:00")) <= t_end
]
users = set(w.get("user", "") for w in window)
if len(users) >= user_threshold:
avg_per_user = len(window) / len(users)
if avg_per_user <= 3:
alerts.append({
"type": "PASSWORD_SPRAY",
"severity": "CRITICAL",
"source_ip": ip,
"targeted_users": len(users),
"total_attempts": len(window),
"avg_per_user": round(avg_per_user, 1),
})
break
return alerts
def build_user_baseline(events, user):
"""Build behavioral baseline for a user from historical events."""
user_events = [e for e in events if e.get("user") == user]
if not user_events:
return {"error": f"No events for user {user}"}
hours = []
ips = Counter()
countries = Counter()
apps = Counter()
devices = Counter()
for e in user_events:
try:
ts = datetime.fromisoformat(e["timestamp"].replace("Z", "+00:00"))
hours.append(ts.hour)
except Exception:
pass
ips[e.get("source_ip", "")] += 1
countries[e.get("country", "")] += 1
apps[e.get("app", "")] += 1
devices[e.get("device", "")] += 1
return {
"user": user,
"event_count": len(user_events),
"typical_hours": sorted(set(hours)),
"top_ips": ips.most_common(10),
"top_countries": countries.most_common(5),
"top_apps": apps.most_common(10),
"top_devices": devices.most_common(5),
"failure_rate": round(
sum(1 for e in user_events if e.get("result") == "failure") / len(user_events), 3
),
}
def calculate_risk_score(alerts):
"""Calculate composite risk score from detected anomalies."""
weights = {
"IMPOSSIBLE_TRAVEL": 40,
"PASSWORD_SPRAY": 35,
"BRUTE_FORCE": 30,
"NEW_COUNTRY": 25,
"OFF_HOURS": 15,
}
score = sum(weights.get(a.get("type", ""), 10) for a in alerts)
score = min(100, score)
if score >= 80:
level = "CRITICAL"
elif score >= 60:
level = "HIGH"
elif score >= 40:
level = "MEDIUM"
else:
level = "LOW"
return {"score": score, "level": level, "alert_count": len(alerts)}
def run_full_analysis(csv_path):
"""Run all detection modules on an auth log CSV file."""
events = load_auth_logs(csv_path)
travel = detect_impossible_travel(events)
brute = detect_brute_force(events)
spray = detect_password_spray(events)
all_alerts = travel + brute + spray
return {
"file": csv_path,
"total_events": len(events),
"impossible_travel": travel,
"brute_force": brute,
"password_spray": spray,
"risk": calculate_risk_score(all_alerts),
}
if __name__ == "__main__":
if len(sys.argv) < 2:
print("Usage: agent.py <auth_logs.csv> [--user <username>]")
sys.exit(1)
csv_file = sys.argv[1]
if "--user" in sys.argv:
idx = sys.argv.index("--user")
user = sys.argv[idx + 1] if idx + 1 < len(sys.argv) else None
if user:
events = load_auth_logs(csv_file)
print(json.dumps(build_user_baseline(events, user), indent=2, default=str))
else:
print(json.dumps(run_full_analysis(csv_file), indent=2, default=str))