Files
Anthropic-Cybersecurity-Skills/skills/hunting-credential-stuffing-attacks/scripts/agent.py
T
mukul975 27c6414ca5 Add folder anatomy (scripts/agent.py + references/api-reference.md) for 648 cybersecurity skills
Complete skill folder anatomy across all cybersecurity skills:
- scripts/agent.py: 80-150 line Python agents using real libraries (impacket,
  boto3, azure-mgmt-*, kubernetes, pefile, yara, scapy, shodan, stix2, etc.)
- references/api-reference.md: real API documentation with method signatures
- LICENSE: MIT license for all skill folders
2026-03-10 21:02:12 +01:00

199 lines
7.5 KiB
Python

#!/usr/bin/env python3
"""Agent for hunting credential stuffing attacks in authentication logs."""
import os
import json
import argparse
from datetime import datetime
from collections import defaultdict
import pandas as pd
import numpy as np
def load_auth_logs(log_path):
"""Load authentication logs from CSV or JSON lines."""
if log_path.endswith(".csv"):
return pd.read_csv(log_path, parse_dates=["timestamp"])
elif log_path.endswith(".json") or log_path.endswith(".jsonl"):
return pd.read_json(log_path, lines=True)
else:
return pd.read_csv(log_path, parse_dates=["timestamp"])
def detect_credential_stuffing(df, ip_threshold=20, time_window="1h"):
"""Detect credential stuffing by analyzing failed login patterns."""
failed = df[df["status"] == "failed"].copy()
if failed.empty:
return []
failed = failed.sort_values("timestamp")
findings = []
ip_account = failed.groupby("source_ip").agg(
unique_accounts=("username", "nunique"),
total_attempts=("username", "count"),
first_seen=("timestamp", "min"),
last_seen=("timestamp", "max"),
).reset_index()
stuffing_ips = ip_account[ip_account["unique_accounts"] >= ip_threshold]
for _, row in stuffing_ips.iterrows():
duration = (row["last_seen"] - row["first_seen"]).total_seconds()
findings.append({
"source_ip": row["source_ip"],
"unique_accounts_targeted": int(row["unique_accounts"]),
"total_attempts": int(row["total_attempts"]),
"duration_seconds": int(duration),
"attempts_per_minute": round(row["total_attempts"] / max(duration / 60, 1), 1),
"type": "credential_stuffing",
"severity": "CRITICAL" if row["unique_accounts"] > 100 else "HIGH",
})
return sorted(findings, key=lambda x: x["unique_accounts_targeted"], reverse=True)
def detect_password_spray(df, account_threshold=10):
"""Detect password spray attacks (one password, many accounts)."""
failed = df[df["status"] == "failed"].copy()
if failed.empty:
return []
findings = []
ip_groups = failed.groupby("source_ip").agg(
unique_accounts=("username", "nunique"),
total_attempts=("username", "count"),
).reset_index()
spray_candidates = ip_groups[
(ip_groups["unique_accounts"] >= account_threshold) &
(ip_groups["total_attempts"] <= ip_groups["unique_accounts"] * 3)
]
for _, row in spray_candidates.iterrows():
ratio = row["total_attempts"] / row["unique_accounts"]
findings.append({
"source_ip": row["source_ip"],
"unique_accounts": int(row["unique_accounts"]),
"total_attempts": int(row["total_attempts"]),
"attempts_per_account": round(ratio, 1),
"type": "password_spray",
"severity": "HIGH",
})
return findings
def detect_distributed_attack(df, account_ip_threshold=5):
"""Detect distributed credential stuffing (many IPs per account)."""
failed = df[df["status"] == "failed"]
if failed.empty:
return []
account_ips = failed.groupby("username").agg(
unique_ips=("source_ip", "nunique"),
total_failures=("source_ip", "count"),
).reset_index()
distributed = account_ips[account_ips["unique_ips"] >= account_ip_threshold]
findings = []
for _, row in distributed.iterrows():
findings.append({
"username": row["username"],
"unique_source_ips": int(row["unique_ips"]),
"total_failures": int(row["total_failures"]),
"type": "distributed_attack",
"severity": "HIGH",
})
return sorted(findings, key=lambda x: x["unique_source_ips"], reverse=True)
def analyze_success_after_failures(df, min_failures=5):
"""Find accounts with successful login after many failures (compromised)."""
compromised = []
for username, group in df.groupby("username"):
group = group.sort_values("timestamp")
failures = 0
for _, row in group.iterrows():
if row["status"] == "failed":
failures += 1
elif row["status"] == "success" and failures >= min_failures:
compromised.append({
"username": username,
"failures_before_success": failures,
"success_ip": row.get("source_ip", ""),
"success_time": str(row["timestamp"]),
"severity": "CRITICAL",
})
break
return compromised
def analyze_user_agent_patterns(df):
"""Detect automation by analyzing user-agent distribution."""
failed = df[df["status"] == "failed"]
if "user_agent" not in failed.columns or failed.empty:
return []
ua_counts = failed["user_agent"].value_counts()
total = len(failed)
suspicious = []
for ua, count in ua_counts.items():
pct = count / total * 100
if pct > 30 and count > 50:
suspicious.append({
"user_agent": str(ua)[:200],
"count": int(count),
"percentage": round(pct, 1),
"likely_automated": True,
})
return suspicious
def calculate_attack_metrics(df):
"""Calculate overall authentication attack metrics."""
total = len(df)
failures = len(df[df["status"] == "failed"])
successes = len(df[df["status"] == "success"])
return {
"total_events": total,
"total_failures": failures,
"total_successes": successes,
"failure_rate": round(failures / max(total, 1) * 100, 1),
"unique_ips": int(df["source_ip"].nunique()),
"unique_accounts": int(df["username"].nunique()),
"time_range": f"{df['timestamp'].min()} to {df['timestamp'].max()}",
}
def main():
parser = argparse.ArgumentParser(description="Credential Stuffing Detection Agent")
parser.add_argument("--log-file", required=True, help="Authentication log file")
parser.add_argument("--output", default="credential_stuffing_report.json")
parser.add_argument("--action", choices=[
"stuffing", "spray", "distributed", "compromised", "full_hunt"
], default="full_hunt")
args = parser.parse_args()
df = load_auth_logs(args.log_file)
report = {"generated_at": datetime.utcnow().isoformat(),
"metrics": calculate_attack_metrics(df), "findings": {}}
print(f"[+] Loaded {len(df)} auth events")
if args.action in ("stuffing", "full_hunt"):
findings = detect_credential_stuffing(df)
report["findings"]["credential_stuffing"] = findings
print(f"[+] Credential stuffing IPs: {len(findings)}")
if args.action in ("spray", "full_hunt"):
findings = detect_password_spray(df)
report["findings"]["password_spray"] = findings
print(f"[+] Password spray IPs: {len(findings)}")
if args.action in ("distributed", "full_hunt"):
findings = detect_distributed_attack(df)
report["findings"]["distributed_attacks"] = findings
print(f"[+] Distributed attack targets: {len(findings)}")
if args.action in ("compromised", "full_hunt"):
findings = analyze_success_after_failures(df)
report["findings"]["compromised_accounts"] = findings
print(f"[+] Potentially compromised accounts: {len(findings)}")
with open(args.output, "w") as f:
json.dump(report, f, indent=2, default=str)
print(f"[+] Report saved to {args.output}")
if __name__ == "__main__":
main()