Files

348 lines
14 KiB
Python

#!/usr/bin/env python3
# For authorized Microsoft 365 compliance administration only
"""Microsoft Purview DLP Management Agent - Automates DLP policy deployment and monitoring via Graph API."""
import json
import logging
import argparse
import csv
from datetime import datetime, timezone, timedelta
from pathlib import Path
import requests
logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s")
logger = logging.getLogger(__name__)
GRAPH_BASE = "https://graph.microsoft.com/v1.0"
GRAPH_BETA = "https://graph.microsoft.com/beta"
class PurviewAuthClient:
"""Handles OAuth2 client credentials authentication for Microsoft Graph."""
def __init__(self, tenant_id, client_id, client_secret):
self.tenant_id = tenant_id
self.client_id = client_id
self.client_secret = client_secret
self.access_token = None
self.token_expiry = None
def get_token(self):
if self.access_token and self.token_expiry and datetime.now(timezone.utc) < self.token_expiry:
return self.access_token
token_url = f"https://login.microsoftonline.com/{self.tenant_id}/oauth2/v2.0/token"
response = requests.post(token_url, data={
"client_id": self.client_id,
"client_secret": self.client_secret,
"scope": "https://graph.microsoft.com/.default",
"grant_type": "client_credentials",
}, timeout=30)
response.raise_for_status()
token_data = response.json()
self.access_token = token_data["access_token"]
self.token_expiry = datetime.now(timezone.utc) + timedelta(
seconds=token_data.get("expires_in", 3600) - 300
)
logger.info("Obtained Graph API access token (expires in %d seconds)",
token_data.get("expires_in", 3600))
return self.access_token
def headers(self):
return {
"Authorization": f"Bearer {self.get_token()}",
"Content-Type": "application/json",
}
def get_dlp_alerts(auth_client, days_back=7, severity=None, top=50):
"""Retrieve DLP alerts from Microsoft Graph Security API."""
url = f"{GRAPH_BASE}/security/alerts_v2"
start_date = (datetime.now(timezone.utc) - timedelta(days=days_back)).strftime(
"%Y-%m-%dT%H:%M:%SZ"
)
filter_parts = [
"serviceSource eq 'microsoftDataLossPrevention'",
f"createdDateTime ge {start_date}",
]
if severity:
filter_parts.append(f"severity eq '{severity}'")
params = {
"$filter": " and ".join(filter_parts),
"$top": top,
"$orderby": "createdDateTime desc",
}
response = requests.get(url, headers=auth_client.headers(), params=params, timeout=60)
response.raise_for_status()
alerts = response.json().get("value", [])
logger.info("Retrieved %d DLP alerts from last %d days", len(alerts), days_back)
return alerts
def get_sensitivity_labels(auth_client):
"""Retrieve all sensitivity labels from the tenant."""
url = f"{GRAPH_BETA}/security/informationProtection/sensitivityLabels"
response = requests.get(url, headers=auth_client.headers(), timeout=30)
response.raise_for_status()
labels = response.json().get("value", [])
logger.info("Retrieved %d sensitivity labels", len(labels))
return labels
def evaluate_dlp_protection_scope(auth_client, user_id):
"""Evaluate DLP protection scope for a specific user."""
url = f"{GRAPH_BETA}/users/{user_id}/security/informationProtection/policy/evaluateApplication"
payload = {
"contentInfo": {
"@odata.type": "#microsoft.graph.security.contentInfo",
"format@odata.type": "#microsoft.graph.security.contentFormat",
"format": "default",
}
}
response = requests.post(url, headers=auth_client.headers(), json=payload, timeout=30)
if response.status_code == 200:
return response.json()
logger.warning("DLP evaluation for user %s returned status %d", user_id, response.status_code)
return None
def generate_alert_summary(alerts):
"""Generate summary statistics from DLP alerts."""
severity_counts = {"high": 0, "medium": 0, "low": 0, "informational": 0}
policy_counts = {}
user_counts = {}
status_counts = {"new": 0, "inProgress": 0, "resolved": 0}
for alert in alerts:
sev = alert.get("severity", "informational").lower()
severity_counts[sev] = severity_counts.get(sev, 0) + 1
title = alert.get("title", "Unknown Policy")
policy_counts[title] = policy_counts.get(title, 0) + 1
status = alert.get("status", "new")
status_counts[status] = status_counts.get(status, 0) + 1
user_states = alert.get("userStates", [])
for user_state in user_states:
upn = user_state.get("userPrincipalName", "Unknown")
user_counts[upn] = user_counts.get(upn, 0) + 1
top_policies = sorted(policy_counts.items(), key=lambda x: x[1], reverse=True)[:10]
top_users = sorted(user_counts.items(), key=lambda x: x[1], reverse=True)[:10]
return {
"total_alerts": len(alerts),
"severity_breakdown": severity_counts,
"status_breakdown": status_counts,
"top_policies": [{"policy": p, "count": c} for p, c in top_policies],
"top_users": [{"user": u, "count": c} for u, c in top_users],
}
def generate_label_report(labels):
"""Generate a report of sensitivity label configuration."""
report = []
for label in labels:
entry = {
"id": label.get("id"),
"name": label.get("name"),
"description": label.get("description", ""),
"color": label.get("color", ""),
"sensitivity": label.get("sensitivity", 0),
"is_active": label.get("isActive", False),
"parent_id": label.get("parent", {}).get("id") if label.get("parent") else None,
"content_formats": label.get("contentFormats", []),
"has_protection": bool(label.get("protectionEnabled")),
}
report.append(entry)
return sorted(report, key=lambda x: x.get("sensitivity", 0))
def check_policy_health(alerts, threshold_high=10, threshold_override_pct=20.0):
"""Analyze DLP policy health based on alert patterns."""
findings = []
high_severity = [a for a in alerts if a.get("severity", "").lower() == "high"]
if len(high_severity) > threshold_high:
findings.append({
"finding": "HIGH_ALERT_VOLUME",
"severity": "WARNING",
"detail": f"{len(high_severity)} high-severity DLP alerts in the analysis period. "
f"Threshold: {threshold_high}. Investigate for data exfiltration patterns.",
"recommendation": "Review top-triggered policies and affected users. Check for "
"compromised accounts or policy misconfiguration.",
})
policy_alert_counts = {}
for alert in alerts:
title = alert.get("title", "Unknown")
policy_alert_counts[title] = policy_alert_counts.get(title, 0) + 1
for policy, count in policy_alert_counts.items():
if count > 100:
findings.append({
"finding": "NOISY_POLICY",
"severity": "INFO",
"detail": f"Policy '{policy}' generated {count} alerts. May indicate "
f"false positive issues or overly broad matching rules.",
"recommendation": "Review SIT confidence thresholds and policy conditions. "
"Consider increasing MinConfidence or adding exclusions.",
})
unresolved = [a for a in alerts if a.get("status") == "new"]
if len(unresolved) > 50:
findings.append({
"finding": "UNRESOLVED_ALERT_BACKLOG",
"severity": "WARNING",
"detail": f"{len(unresolved)} DLP alerts in 'new' status. Alert fatigue risk.",
"recommendation": "Assign alerts to compliance analysts. Configure auto-resolution "
"for low-severity informational alerts. Implement alert triage SOP.",
})
if not findings:
findings.append({
"finding": "HEALTHY",
"severity": "INFO",
"detail": "DLP policy health checks passed. No anomalies detected.",
"recommendation": "Continue regular monitoring. Schedule quarterly policy review.",
})
return findings
def export_alerts_csv(alerts, output_path):
"""Export DLP alerts to CSV for compliance reporting."""
fieldnames = [
"id", "title", "severity", "status", "createdDateTime",
"user", "description", "category",
]
with open(output_path, "w", newline="", encoding="utf-8") as f:
writer = csv.DictWriter(f, fieldnames=fieldnames)
writer.writeheader()
for alert in alerts:
user_states = alert.get("userStates", [])
upn = user_states[0].get("userPrincipalName", "N/A") if user_states else "N/A"
writer.writerow({
"id": alert.get("id", ""),
"title": alert.get("title", ""),
"severity": alert.get("severity", ""),
"status": alert.get("status", ""),
"createdDateTime": alert.get("createdDateTime", ""),
"user": upn,
"description": alert.get("description", ""),
"category": alert.get("category", ""),
})
logger.info("Exported %d alerts to %s", len(alerts), output_path)
def generate_compliance_report(auth_client, days_back=30, output_dir="."):
"""Generate comprehensive DLP compliance report."""
output_dir = Path(output_dir)
output_dir.mkdir(parents=True, exist_ok=True)
logger.info("Generating DLP compliance report for last %d days", days_back)
alerts = get_dlp_alerts(auth_client, days_back=days_back, top=500)
alert_summary = generate_alert_summary(alerts)
health_findings = check_policy_health(alerts)
labels = get_sensitivity_labels(auth_client)
label_report = generate_label_report(labels)
report = {
"report_generated": datetime.now(timezone.utc).isoformat(),
"analysis_period_days": days_back,
"alert_summary": alert_summary,
"policy_health": health_findings,
"sensitivity_labels": label_report,
"alert_details": alerts[:100],
}
report_path = output_dir / "dlp_compliance_report.json"
report_path.write_text(json.dumps(report, indent=2, default=str))
logger.info("Compliance report saved to %s", report_path)
csv_path = output_dir / "dlp_alerts_export.csv"
export_alerts_csv(alerts, csv_path)
print("\n" + "=" * 70)
print("DLP COMPLIANCE REPORT SUMMARY")
print("=" * 70)
print(f"Report Period: Last {days_back} days")
print(f"Total Alerts: {alert_summary['total_alerts']}")
print(f"Severity: High={alert_summary['severity_breakdown']['high']}, "
f"Medium={alert_summary['severity_breakdown']['medium']}, "
f"Low={alert_summary['severity_breakdown']['low']}")
print(f"Sensitivity Labels Configured: {len(label_report)}")
print(f"\nPolicy Health Findings: {len(health_findings)}")
for finding in health_findings:
print(f" [{finding['severity']}] {finding['finding']}: {finding['detail']}")
print(f"\nTop Triggered Policies:")
for entry in alert_summary.get("top_policies", [])[:5]:
print(f" - {entry['policy']}: {entry['count']} alerts")
print(f"\nTop Affected Users:")
for entry in alert_summary.get("top_users", [])[:5]:
print(f" - {entry['user']}: {entry['count']} alerts")
print("=" * 70)
print(f"Full report: {report_path}")
print(f"Alert export: {csv_path}")
return report
def main():
parser = argparse.ArgumentParser(
description="Microsoft Purview DLP Management Agent - Monitor and report on DLP policies"
)
parser.add_argument("--tenant-id", required=True, help="Azure AD tenant ID")
parser.add_argument("--client-id", required=True, help="App registration client ID")
parser.add_argument("--client-secret", required=True,
help="App registration client secret")
parser.add_argument("--action", required=True,
choices=["alerts", "labels", "health", "report"],
help="Action to perform")
parser.add_argument("--days", type=int, default=7,
help="Number of days to look back for alerts (default: 7)")
parser.add_argument("--severity", choices=["high", "medium", "low", "informational"],
help="Filter alerts by severity")
parser.add_argument("--output-dir", default=".",
help="Directory for output files (default: current directory)")
parser.add_argument("--output", help="Output file path (overrides default naming)")
args = parser.parse_args()
auth_client = PurviewAuthClient(args.tenant_id, args.client_id, args.client_secret)
if args.action == "alerts":
alerts = get_dlp_alerts(auth_client, days_back=args.days, severity=args.severity)
summary = generate_alert_summary(alerts)
output = {"summary": summary, "alerts": alerts}
out_path = args.output or Path(args.output_dir) / "dlp_alerts.json"
Path(out_path).write_text(json.dumps(output, indent=2, default=str))
logger.info("Alert report saved to %s (%d alerts)", out_path, len(alerts))
elif args.action == "labels":
labels = get_sensitivity_labels(auth_client)
label_report = generate_label_report(labels)
out_path = args.output or Path(args.output_dir) / "sensitivity_labels.json"
Path(out_path).write_text(json.dumps(label_report, indent=2, default=str))
logger.info("Label report saved to %s (%d labels)", out_path, len(labels))
elif args.action == "health":
alerts = get_dlp_alerts(auth_client, days_back=args.days, top=500)
findings = check_policy_health(alerts)
out_path = args.output or Path(args.output_dir) / "dlp_health.json"
Path(out_path).write_text(json.dumps(findings, indent=2, default=str))
logger.info("Health report saved to %s (%d findings)", out_path, len(findings))
for finding in findings:
level = logging.WARNING if finding["severity"] == "WARNING" else logging.INFO
logger.log(level, "[%s] %s: %s", finding["severity"], finding["finding"],
finding["detail"])
elif args.action == "report":
generate_compliance_report(auth_client, days_back=args.days, output_dir=args.output_dir)
if __name__ == "__main__":
main()