Anthropic-Cybersecurity-Skills/skills/implementing-anti-phishing-training-program/scripts/agent.py

#!/usr/bin/env python3
"""Agent for managing and analyzing anti-phishing training program metrics."""

import json
import argparse
from datetime import datetime

import pandas as pd
import numpy as np


def load_simulation_results(csv_path):
    """Load phishing simulation results CSV."""
    df = pd.read_csv(csv_path, parse_dates=["timestamp"])
    return df


def calculate_department_metrics(df):
    """Calculate phishing susceptibility metrics per department."""
    results = []
    for dept, group in df.groupby("department"):
        total = len(group)
        clicked = group["clicked"].sum()
        submitted = group["submitted_credentials"].sum() if "submitted_credentials" in group.columns else 0
        reported = group["reported"].sum() if "reported" in group.columns else 0
        results.append({
            "department": dept,
            "total_recipients": int(total),
            "click_rate": round(clicked / total * 100, 1) if total > 0 else 0,
            "submission_rate": round(submitted / total * 100, 1) if total > 0 else 0,
            "report_rate": round(reported / total * 100, 1) if total > 0 else 0,
            "risk_level": "HIGH" if clicked / total > 0.3 else "MEDIUM" if clicked / total > 0.15 else "LOW",
        })
    return sorted(results, key=lambda x: x["click_rate"], reverse=True)


def analyze_trend(df):
    """Analyze phishing simulation trends over time."""
    df["month"] = df["timestamp"].dt.to_period("M")
    monthly = df.groupby("month").agg(
        total=("clicked", "count"),
        clicks=("clicked", "sum"),
    ).reset_index()
    monthly["click_rate"] = (monthly["clicks"] / monthly["total"] * 100).round(1)
    monthly["month"] = monthly["month"].astype(str)
    trend = monthly.to_dict(orient="records")
    if len(trend) >= 2:
        first_rate = trend[0]["click_rate"]
        last_rate = trend[-1]["click_rate"]
        improvement = round(first_rate - last_rate, 1)
    else:
        improvement = 0
    return {"monthly_data": trend, "improvement_pct": improvement}


def identify_repeat_clickers(df):
    """Identify users who repeatedly click phishing links."""
    clickers = df[df["clicked"] == True]
    repeat = clickers.groupby("email").agg(
        click_count=("clicked", "sum"),
        department=("department", "first"),
        name=("name", "first") if "name" in df.columns else ("email", "first"),
    ).reset_index()
    repeat = repeat[repeat["click_count"] >= 2].sort_values("click_count", ascending=False)
    return repeat.to_dict(orient="records")


def calculate_training_completion(training_df):
    """Calculate training module completion rates."""
    results = []
    for module, group in training_df.groupby("module_name"):
        total = len(group)
        completed = group["completed"].sum()
        results.append({
            "module": module,
            "enrolled": int(total),
            "completed": int(completed),
            "completion_rate": round(completed / total * 100, 1) if total > 0 else 0,
        })
    return sorted(results, key=lambda x: x["completion_rate"])


def generate_risk_score(dept_metrics):
    """Generate overall organization risk score based on phishing metrics."""
    if not dept_metrics:
        return {"score": 0, "grade": "N/A"}
    avg_click = np.mean([d["click_rate"] for d in dept_metrics])
    avg_report = np.mean([d["report_rate"] for d in dept_metrics])
    score = max(0, 100 - (avg_click * 2) + (avg_report * 0.5))
    if score >= 85:
        grade = "A"
    elif score >= 70:
        grade = "B"
    elif score >= 55:
        grade = "C"
    elif score >= 40:
        grade = "D"
    else:
        grade = "F"
    return {
        "score": round(score, 1),
        "grade": grade,
        "avg_click_rate": round(avg_click, 1),
        "avg_report_rate": round(avg_report, 1),
    }


def recommend_training(dept_metrics, repeat_clickers):
    """Generate training recommendations based on metrics."""
    recommendations = []
    high_risk_depts = [d for d in dept_metrics if d["risk_level"] == "HIGH"]
    for dept in high_risk_depts:
        recommendations.append({
            "target": dept["department"],
            "type": "department",
            "action": "Mandatory phishing awareness training",
            "priority": "HIGH",
            "reason": f"Click rate {dept['click_rate']}% exceeds 30% threshold",
        })
    for user in repeat_clickers[:20]:
        recommendations.append({
            "target": user.get("email", ""),
            "type": "individual",
            "action": "One-on-one coaching session",
            "priority": "CRITICAL",
            "reason": f"Clicked {user['click_count']} times across simulations",
        })
    return recommendations


def main():
    parser = argparse.ArgumentParser(description="Anti-Phishing Training Program Agent")
    parser.add_argument("--simulation-csv", help="Phishing simulation results CSV")
    parser.add_argument("--training-csv", help="Training completion CSV")
    parser.add_argument("--output", default="phishing_training_report.json")
    parser.add_argument("--action", choices=[
        "departments", "trends", "repeaters", "completion", "full_analysis"
    ], default="full_analysis")
    args = parser.parse_args()

    report = {"generated_at": datetime.utcnow().isoformat(), "findings": {}}

    if args.simulation_csv:
        df = load_simulation_results(args.simulation_csv)
        print(f"[+] Loaded {len(df)} simulation results")

        if args.action in ("departments", "full_analysis"):
            metrics = calculate_department_metrics(df)
            report["findings"]["department_metrics"] = metrics
            report["findings"]["risk_score"] = generate_risk_score(metrics)
            print(f"[+] Departments analyzed: {len(metrics)}")

        if args.action in ("trends", "full_analysis"):
            trend = analyze_trend(df)
            report["findings"]["trend_analysis"] = trend
            print(f"[+] Improvement: {trend['improvement_pct']}%")

        if args.action in ("repeaters", "full_analysis"):
            repeaters = identify_repeat_clickers(df)
            report["findings"]["repeat_clickers"] = repeaters
            print(f"[+] Repeat clickers: {len(repeaters)}")

        if args.action == "full_analysis":
            metrics = report["findings"].get("department_metrics", [])
            repeaters = report["findings"].get("repeat_clickers", [])
            recs = recommend_training(metrics, repeaters)
            report["findings"]["recommendations"] = recs

    if args.training_csv:
        tdf = pd.read_csv(args.training_csv)
        completion = calculate_training_completion(tdf)
        report["findings"]["training_completion"] = completion
        print(f"[+] Training modules: {len(completion)}")

    with open(args.output, "w") as f:
        json.dump(report, f, indent=2, default=str)
    print(f"[+] Report saved to {args.output}")


if __name__ == "__main__":
    main()