mirror of
https://github.com/mukul975/Anthropic-Cybersecurity-Skills.git
synced 2026-06-10 21:24:56 +03:00
c47eed6a64
- Fix 25 shell=True subprocess calls with list-based commands - Fix 49 verify=False in defensive skills (env-var override) - Add timeout to 231 HTTP/subprocess/socket calls - Fix 6 SQL injection patterns with whitelist validation - Replace 8 __import__() with standard imports - Remove 701 unused imports across 442 files - Add authorized-testing disclaimers to all offensive skills - Complete 11 incomplete skill directories - Expand 10 stub SKILL.md files with full content - Fix 2 YAML parse errors in frontmatter - Fix 5 pre-existing syntax errors - Convert 22 hardcoded paths/ports to environment variables - Back up 21 redundant skill pairs to .bak - Fix 2 global declaration errors - 724/724 skills with full folder anatomy (SKILL.md + agent.py + api-reference.md + LICENSE) - 0 compile errors across all 724 agent.py files
201 lines
7.9 KiB
Python
201 lines
7.9 KiB
Python
#!/usr/bin/env python3
|
|
"""Cloud Storage Access Pattern Analyzer - Detects abnormal S3/GCS/Azure Blob access via CloudTrail."""
|
|
|
|
import json
|
|
import logging
|
|
import argparse
|
|
import subprocess
|
|
from collections import defaultdict
|
|
from datetime import datetime, timedelta
|
|
|
|
logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s")
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
def query_cloudtrail_s3_events(bucket_name, hours_back=24):
|
|
"""Query CloudTrail for S3 data events on a specific bucket."""
|
|
start_time = (datetime.utcnow() - timedelta(hours=hours_back)).strftime("%Y-%m-%dT%H:%M:%SZ")
|
|
cmd = [
|
|
"aws", "cloudtrail", "lookup-events",
|
|
"--lookup-attributes", f"AttributeKey=ResourceType,AttributeValue=AWS::S3::Object",
|
|
"--start-time", start_time,
|
|
"--output", "json",
|
|
]
|
|
result = subprocess.run(cmd, capture_output=True, text=True, timeout=120)
|
|
if result.returncode != 0:
|
|
logger.error("CloudTrail query failed: %s", result.stderr[:200])
|
|
return []
|
|
events = json.loads(result.stdout).get("Events", [])
|
|
s3_events = []
|
|
for event in events:
|
|
ct_event = json.loads(event.get("CloudTrailEvent", "{}"))
|
|
req_params = ct_event.get("requestParameters", {})
|
|
if req_params.get("bucketName") == bucket_name or not bucket_name:
|
|
s3_events.append({
|
|
"timestamp": event.get("EventTime", ""),
|
|
"event_name": event.get("EventName", ""),
|
|
"username": event.get("Username", ""),
|
|
"source_ip": ct_event.get("sourceIPAddress", ""),
|
|
"user_agent": ct_event.get("userAgent", ""),
|
|
"bucket": req_params.get("bucketName", ""),
|
|
"key": req_params.get("key", ""),
|
|
"user_arn": ct_event.get("userIdentity", {}).get("arn", ""),
|
|
})
|
|
logger.info("Retrieved %d S3 events for bucket '%s'", len(s3_events), bucket_name or "all")
|
|
return s3_events
|
|
|
|
|
|
def detect_bulk_downloads(events, threshold=100):
|
|
"""Detect bulk GetObject operations from a single principal."""
|
|
user_downloads = defaultdict(list)
|
|
for event in events:
|
|
if event["event_name"] == "GetObject":
|
|
user_downloads[event["user_arn"]].append(event)
|
|
alerts = []
|
|
for user_arn, downloads in user_downloads.items():
|
|
if len(downloads) >= threshold:
|
|
keys = [d["key"] for d in downloads]
|
|
alerts.append({
|
|
"user_arn": user_arn,
|
|
"download_count": len(downloads),
|
|
"unique_keys": len(set(keys)),
|
|
"source_ips": list({d["source_ip"] for d in downloads}),
|
|
"first_access": downloads[0]["timestamp"],
|
|
"last_access": downloads[-1]["timestamp"],
|
|
"severity": "critical",
|
|
"indicator": "Bulk download (potential exfiltration)",
|
|
})
|
|
logger.info("Found %d bulk download alerts", len(alerts))
|
|
return alerts
|
|
|
|
|
|
def detect_after_hours_access(events, business_start=8, business_end=18):
|
|
"""Detect access outside business hours."""
|
|
after_hours = []
|
|
for event in events:
|
|
try:
|
|
ts = event["timestamp"]
|
|
if isinstance(ts, str):
|
|
dt = datetime.fromisoformat(ts.replace("Z", "+00:00"))
|
|
else:
|
|
dt = ts
|
|
hour = dt.hour
|
|
if hour < business_start or hour >= business_end:
|
|
event["indicator"] = f"After-hours access at {hour:02d}:00 UTC"
|
|
event["severity"] = "medium"
|
|
after_hours.append(event)
|
|
except (ValueError, AttributeError):
|
|
continue
|
|
logger.info("Found %d after-hours access events", len(after_hours))
|
|
return after_hours
|
|
|
|
|
|
def detect_new_source_ips(events, known_ips=None):
|
|
"""Detect access from IP addresses not in the known baseline."""
|
|
if known_ips is None:
|
|
known_ips = set()
|
|
new_ip_events = []
|
|
for event in events:
|
|
ip = event["source_ip"]
|
|
if ip and ip not in known_ips and not ip.startswith("AWS Internal"):
|
|
event["indicator"] = f"New source IP: {ip}"
|
|
event["severity"] = "high"
|
|
new_ip_events.append(event)
|
|
unique_new = len({e["source_ip"] for e in new_ip_events})
|
|
logger.info("Found %d events from %d new source IPs", len(new_ip_events), unique_new)
|
|
return new_ip_events
|
|
|
|
|
|
def detect_enumeration(events, threshold=20):
|
|
"""Detect ListBucket/ListObjects enumeration patterns."""
|
|
user_listings = defaultdict(int)
|
|
for event in events:
|
|
if event["event_name"] in ("ListBucket", "ListObjects", "ListObjectsV2"):
|
|
user_listings[event["user_arn"]] += 1
|
|
alerts = []
|
|
for user_arn, count in user_listings.items():
|
|
if count >= threshold:
|
|
alerts.append({
|
|
"user_arn": user_arn,
|
|
"list_count": count,
|
|
"severity": "high",
|
|
"indicator": "Bucket enumeration spike (reconnaissance)",
|
|
})
|
|
return alerts
|
|
|
|
|
|
def build_access_baseline(events):
|
|
"""Build statistical baseline of normal access patterns."""
|
|
hourly_counts = defaultdict(int)
|
|
user_counts = defaultdict(int)
|
|
ip_set = set()
|
|
for event in events:
|
|
try:
|
|
ts = event["timestamp"]
|
|
if isinstance(ts, str):
|
|
dt = datetime.fromisoformat(ts.replace("Z", "+00:00"))
|
|
hourly_counts[dt.hour] += 1
|
|
except (ValueError, AttributeError):
|
|
pass
|
|
user_counts[event["user_arn"]] += 1
|
|
if event["source_ip"]:
|
|
ip_set.add(event["source_ip"])
|
|
return {
|
|
"hourly_distribution": dict(hourly_counts),
|
|
"user_request_counts": dict(user_counts),
|
|
"known_ips": list(ip_set),
|
|
"total_events": len(events),
|
|
}
|
|
|
|
|
|
def generate_report(events, bulk_alerts, after_hours, new_ips, enum_alerts, baseline):
|
|
"""Generate cloud storage access analysis report."""
|
|
report = {
|
|
"timestamp": datetime.utcnow().isoformat(),
|
|
"total_events_analyzed": len(events),
|
|
"bulk_download_alerts": bulk_alerts,
|
|
"after_hours_access": len(after_hours),
|
|
"new_source_ip_events": len(new_ips),
|
|
"enumeration_alerts": enum_alerts,
|
|
"baseline_summary": {
|
|
"known_ips": len(baseline.get("known_ips", [])),
|
|
"total_baseline_events": baseline.get("total_events", 0),
|
|
},
|
|
"sample_after_hours": after_hours[:10],
|
|
"sample_new_ips": new_ips[:10],
|
|
}
|
|
total_alerts = len(bulk_alerts) + len(enum_alerts) + (1 if new_ips else 0)
|
|
print(f"CLOUD STORAGE REPORT: {len(events)} events, {total_alerts} alerts")
|
|
return report
|
|
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser(description="Cloud Storage Access Pattern Analyzer")
|
|
parser.add_argument("--bucket", default="", help="S3 bucket name to analyze")
|
|
parser.add_argument("--hours-back", type=int, default=24)
|
|
parser.add_argument("--bulk-threshold", type=int, default=100)
|
|
parser.add_argument("--known-ips-file", help="File with known IP baselines")
|
|
parser.add_argument("--output", default="s3_access_report.json")
|
|
args = parser.parse_args()
|
|
|
|
events = query_cloudtrail_s3_events(args.bucket, args.hours_back)
|
|
baseline = build_access_baseline(events)
|
|
known_ips = set(baseline.get("known_ips", []))
|
|
if args.known_ips_file:
|
|
with open(args.known_ips_file) as f:
|
|
known_ips.update(line.strip() for line in f if line.strip())
|
|
|
|
bulk_alerts = detect_bulk_downloads(events, args.bulk_threshold)
|
|
after_hours = detect_after_hours_access(events)
|
|
new_ips = detect_new_source_ips(events, known_ips)
|
|
enum_alerts = detect_enumeration(events)
|
|
|
|
report = generate_report(events, bulk_alerts, after_hours, new_ips, enum_alerts, baseline)
|
|
with open(args.output, "w") as f:
|
|
json.dump(report, f, indent=2, default=str)
|
|
logger.info("Report saved to %s", args.output)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|