Files
mukul975 c47eed6a64 Production hardening: security fixes, code quality, 724 skills complete
- Fix 25 shell=True subprocess calls with list-based commands
- Fix 49 verify=False in defensive skills (env-var override)
- Add timeout to 231 HTTP/subprocess/socket calls
- Fix 6 SQL injection patterns with whitelist validation
- Replace 8 __import__() with standard imports
- Remove 701 unused imports across 442 files
- Add authorized-testing disclaimers to all offensive skills
- Complete 11 incomplete skill directories
- Expand 10 stub SKILL.md files with full content
- Fix 2 YAML parse errors in frontmatter
- Fix 5 pre-existing syntax errors
- Convert 22 hardcoded paths/ports to environment variables
- Back up 21 redundant skill pairs to .bak
- Fix 2 global declaration errors
- 724/724 skills with full folder anatomy (SKILL.md + agent.py + api-reference.md + LICENSE)
- 0 compile errors across all 724 agent.py files
2026-03-19 13:26:49 +01:00

220 lines
8.3 KiB
Python

#!/usr/bin/env python3
"""User Behavior Analytics (UEBA) agent using elasticsearch-py."""
import math
import os
import sys
from datetime import datetime
try:
from elasticsearch import Elasticsearch
except ImportError:
print("Install: pip install elasticsearch")
sys.exit(1)
EARTH_RADIUS_KM = 6371
def get_es_client(host=None, api_key=None):
host = host or os.environ.get("ES_HOSTS", "https://localhost:9200")
kwargs = {"hosts": [host], "verify_certs": False}
if api_key:
kwargs["api_key"] = api_key
return Elasticsearch(**kwargs)
def haversine(lat1, lon1, lat2, lon2):
"""Calculate distance in km between two coordinates."""
lat1, lon1, lat2, lon2 = map(math.radians, [lat1, lon1, lat2, lon2])
dlat = lat2 - lat1
dlon = lon2 - lon1
a = math.sin(dlat / 2) ** 2 + math.cos(lat1) * math.cos(lat2) * math.sin(dlon / 2) ** 2
return EARTH_RADIUS_KM * 2 * math.asin(math.sqrt(a))
def build_user_baselines(es, index="logs-auth-*", days=30):
"""Build behavioral baselines from historical authentication data."""
query = {
"size": 0,
"query": {
"bool": {
"must": [
{"range": {"@timestamp": {"gte": f"now-{days}d", "lt": "now-1d"}}},
{"term": {"event.outcome": "success"}},
]
}
},
"aggs": {
"by_user": {
"terms": {"field": "user.name", "size": 5000},
"aggs": {
"unique_ips": {"cardinality": {"field": "source.ip"}},
"unique_countries": {"cardinality": {"field": "source.geo.country_name"}},
"login_hours": {"stats": {"script": "doc['@timestamp'].value.getHour()"}},
"daily_count": {
"date_histogram": {"field": "@timestamp", "calendar_interval": "day"},
},
}
}
},
}
result = es.search(index=index, body=query)
baselines = {}
for bucket in result["aggregations"]["by_user"]["buckets"]:
user = bucket["key"]
daily_counts = [b["doc_count"] for b in bucket["daily_count"]["buckets"]]
avg_daily = sum(daily_counts) / max(len(daily_counts), 1)
baselines[user] = {
"unique_ips": bucket["unique_ips"]["value"],
"unique_countries": bucket["unique_countries"]["value"],
"avg_login_hour": bucket["login_hours"]["avg"],
"stdev_login_hour": bucket["login_hours"].get("std_deviation", 4),
"avg_daily_logins": round(avg_daily, 1),
"total_logins": bucket["doc_count"],
}
return baselines
def detect_impossible_travel(es, index="logs-auth-*", hours=24):
"""Detect logins from geographically distant locations within impossible timeframes."""
query = {
"size": 10000,
"query": {
"bool": {
"must": [
{"range": {"@timestamp": {"gte": f"now-{hours}h"}}},
{"term": {"event.outcome": "success"}},
{"exists": {"field": "source.geo.location"}},
]
}
},
"sort": [{"user.name": "asc"}, {"@timestamp": "asc"}],
}
result = es.search(index=index, body=query)
events_by_user = {}
for hit in result["hits"]["hits"]:
src = hit["_source"]
user = src.get("user", {}).get("name")
if not user:
continue
events_by_user.setdefault(user, []).append({
"timestamp": src.get("@timestamp"),
"ip": src.get("source", {}).get("ip"),
"lat": src.get("source", {}).get("geo", {}).get("location", {}).get("lat"),
"lon": src.get("source", {}).get("geo", {}).get("location", {}).get("lon"),
"city": src.get("source", {}).get("geo", {}).get("city_name"),
"country": src.get("source", {}).get("geo", {}).get("country_name"),
})
alerts = []
for user, events in events_by_user.items():
for i in range(1, len(events)):
prev, curr = events[i - 1], events[i]
if not all([prev.get("lat"), prev.get("lon"), curr.get("lat"), curr.get("lon")]):
continue
dist = haversine(prev["lat"], prev["lon"], curr["lat"], curr["lon"])
try:
t1 = datetime.fromisoformat(prev["timestamp"].replace("Z", "+00:00"))
t2 = datetime.fromisoformat(curr["timestamp"].replace("Z", "+00:00"))
hours_diff = (t2 - t1).total_seconds() / 3600
except (ValueError, TypeError):
continue
if hours_diff <= 0:
continue
speed = dist / hours_diff
if speed > 900 and dist > 500:
alerts.append({
"user": user,
"from": f"{prev.get('city', '?')}, {prev.get('country', '?')}",
"to": f"{curr.get('city', '?')}, {curr.get('country', '?')}",
"distance_km": round(dist),
"time_hours": round(hours_diff, 2),
"speed_kmh": round(speed),
"prev_time": prev["timestamp"],
"curr_time": curr["timestamp"],
})
return alerts
def detect_off_hours_access(es, baselines, index="logs-auth-*", hours=168):
"""Detect logins outside user's normal working hours."""
query = {
"size": 5000,
"query": {
"bool": {
"must": [
{"range": {"@timestamp": {"gte": f"now-{hours}h"}}},
{"term": {"event.outcome": "success"}},
]
}
},
}
result = es.search(index=index, body=query)
alerts = []
for hit in result["hits"]["hits"]:
src = hit["_source"]
user = src.get("user", {}).get("name")
ts = src.get("@timestamp", "")
if not user or user not in baselines:
continue
try:
dt = datetime.fromisoformat(ts.replace("Z", "+00:00"))
except (ValueError, TypeError):
continue
hour = dt.hour
baseline = baselines[user]
avg_hour = baseline.get("avg_login_hour", 12)
stdev = baseline.get("stdev_login_hour", 4)
if avg_hour and stdev:
if hour < (avg_hour - 2 * stdev) or hour > (avg_hour + 2 * stdev):
if hour < 6 or hour > 22 or dt.weekday() >= 5:
alerts.append({
"user": user,
"timestamp": ts,
"login_hour": hour,
"baseline_avg": round(avg_hour, 1),
"weekend": dt.weekday() >= 5,
"ip": src.get("source", {}).get("ip"),
})
return alerts
def calculate_risk_scores(impossible_travel, off_hours, baselines):
"""Aggregate anomalies into composite risk scores per user."""
scores = {}
for alert in impossible_travel:
user = alert["user"]
scores.setdefault(user, {"risk": 0, "anomalies": []})
scores[user]["risk"] += 40
scores[user]["anomalies"].append(f"Impossible travel: {alert['from']} -> {alert['to']}")
for alert in off_hours:
user = alert["user"]
scores.setdefault(user, {"risk": 0, "anomalies": []})
scores[user]["risk"] += 20
scores[user]["anomalies"].append(f"Off-hours login at {alert['login_hour']}:00")
sorted_users = sorted(scores.items(), key=lambda x: -x[1]["risk"])
return sorted_users
def print_report(travel_alerts, offhours_alerts, risk_scores):
print("UEBA ANOMALY REPORT")
print("=" * 50)
print(f"Date: {datetime.now().isoformat()}")
print(f"Impossible Travel Alerts: {len(travel_alerts)}")
print(f"Off-Hours Access Alerts: {len(offhours_alerts)}")
print(f"\nTOP RISK USERS:")
for user, data in risk_scores[:10]:
print(f" {user:20s} Risk: {data['risk']:>5}")
for a in data["anomalies"][:3]:
print(f" - {a}")
if __name__ == "__main__":
host = sys.argv[1] if len(sys.argv) > 1 else os.environ.get("ES_HOSTS", "https://localhost:9200")
es = get_es_client(host)
baselines = build_user_baselines(es)
travel = detect_impossible_travel(es)
offhours = detect_off_hours_access(es, baselines)
risk = calculate_risk_scores(travel, offhours, baselines)
print_report(travel, offhours, risk)