Files
Anthropic-Cybersecurity-Skills/skills/hunting-for-webshell-activity/scripts/agent.py
T
mukul975 27c6414ca5 Add folder anatomy (scripts/agent.py + references/api-reference.md) for 648 cybersecurity skills
Complete skill folder anatomy across all cybersecurity skills:
- scripts/agent.py: 80-150 line Python agents using real libraries (impacket,
  boto3, azure-mgmt-*, kubernetes, pefile, yara, scapy, shodan, stix2, etc.)
- references/api-reference.md: real API documentation with method signatures
- LICENSE: MIT license for all skill folders
2026-03-10 21:02:12 +01:00

178 lines
6.7 KiB
Python

#!/usr/bin/env python3
"""Agent for hunting web shell activity on web servers via process tree and log analysis."""
import json
import argparse
import re
from datetime import datetime
from collections import defaultdict
from pathlib import Path
WEB_SERVER_PROCESSES = [
"w3wp.exe", "httpd", "apache2", "nginx", "tomcat", "java",
"php-cgi", "php-fpm", "node", "iisexpress",
]
SHELL_SPAWNS = [
"cmd.exe", "powershell.exe", "pwsh.exe", "bash", "sh",
"wscript.exe", "cscript.exe", "certutil.exe", "whoami.exe",
"net.exe", "net1.exe", "ipconfig.exe", "systeminfo.exe",
"tasklist.exe", "nslookup.exe",
]
WEBSHELL_HTTP_PATTERNS = [
r"POST\s+.*\.(asp|aspx|php|jsp|jspx)\s+",
r"cmd=", r"exec=", r"command=", r"shell=",
r"c99shell", r"r57shell", r"b374k", r"weevely",
r"china\s*chopper", r"antsword",
]
def load_process_logs(log_path):
"""Load process creation logs (JSON lines)."""
entries = []
with open(log_path) as f:
for line in f:
try:
entries.append(json.loads(line))
except json.JSONDecodeError:
continue
return entries
def detect_webserver_child_shells(process_logs):
"""Detect shell processes spawned by web server processes."""
findings = []
for entry in process_logs:
parent = entry.get("ParentImage", entry.get("parent_process", "")).lower()
child = entry.get("Image", entry.get("process_name", "")).lower()
is_web_parent = any(ws in parent for ws in WEB_SERVER_PROCESSES)
is_shell_child = any(sh in child for sh in SHELL_SPAWNS)
if is_web_parent and is_shell_child:
cmd = entry.get("CommandLine", entry.get("command_line", ""))
findings.append({
"timestamp": entry.get("UtcTime", entry.get("timestamp", "")),
"hostname": entry.get("Computer", entry.get("hostname", "")),
"parent_process": parent,
"child_process": child,
"command_line": cmd[:500],
"user": entry.get("User", ""),
"severity": "CRITICAL",
"technique": "T1505.003",
})
return findings
def analyze_web_access_logs(access_log_path):
"""Analyze web access logs for webshell indicators."""
findings = []
with open(access_log_path) as f:
for i, line in enumerate(f, 1):
for pattern in WEBSHELL_HTTP_PATTERNS:
if re.search(pattern, line, re.IGNORECASE):
ip_match = re.match(r"^(\S+)", line)
findings.append({
"line_number": i,
"source_ip": ip_match.group(1) if ip_match else "",
"log_entry": line.strip()[:500],
"pattern_matched": pattern,
"severity": "HIGH",
})
break
return findings
def detect_file_creation_in_webroot(file_events, webroot_paths=None):
"""Detect new script files created in web server directories."""
if webroot_paths is None:
webroot_paths = [
"/var/www", "/opt/lampp/htdocs", "inetpub/wwwroot",
"/usr/share/nginx/html", "/srv/www",
]
script_extensions = [".php", ".asp", ".aspx", ".jsp", ".jspx", ".cgi", ".cfm"]
findings = []
for event in file_events:
filepath = event.get("TargetFilename", event.get("file_path", "")).lower()
in_webroot = any(wr in filepath for wr in webroot_paths)
is_script = any(filepath.endswith(ext) for ext in script_extensions)
if in_webroot and is_script:
findings.append({
"timestamp": event.get("UtcTime", event.get("timestamp", "")),
"file_path": filepath,
"process": event.get("Image", event.get("process_name", "")),
"hostname": event.get("Computer", event.get("hostname", "")),
"severity": "CRITICAL",
"reason": "script_created_in_webroot",
})
return findings
def detect_post_exploitation(process_logs):
"""Detect reconnaissance commands typically run through webshells."""
recon_patterns = [
(r"whoami", "user_discovery"),
(r"ipconfig|ifconfig", "network_config"),
(r"net\s+(user|group|localgroup)", "account_enum"),
(r"systeminfo", "system_info"),
(r"tasklist|ps\s+aux", "process_enum"),
(r"netstat\s+-an", "connection_enum"),
(r"dir\s+/s|find\s+/|ls\s+-la", "file_enum"),
]
findings = []
for entry in process_logs:
parent = entry.get("ParentImage", entry.get("parent_process", "")).lower()
if not any(ws in parent for ws in WEB_SERVER_PROCESSES):
continue
cmd = entry.get("CommandLine", entry.get("command_line", ""))
for pattern, category in recon_patterns:
if re.search(pattern, cmd, re.IGNORECASE):
findings.append({
"timestamp": entry.get("UtcTime", entry.get("timestamp", "")),
"command": cmd[:300],
"category": category,
"parent": parent,
"severity": "HIGH",
})
break
return findings
def main():
parser = argparse.ArgumentParser(description="Webshell Activity Hunter")
parser.add_argument("--process-log", help="JSON lines process creation log")
parser.add_argument("--access-log", help="Web server access log")
parser.add_argument("--file-events", help="JSON lines file creation events")
parser.add_argument("--output", default="webshell_hunt_report.json")
args = parser.parse_args()
report = {"generated_at": datetime.utcnow().isoformat(), "findings": {}}
if args.process_log:
logs = load_process_logs(args.process_log)
shells = detect_webserver_child_shells(logs)
report["findings"]["shell_spawns"] = shells
print(f"[+] Web server shell spawns: {len(shells)}")
recon = detect_post_exploitation(logs)
report["findings"]["post_exploitation"] = recon
print(f"[+] Post-exploitation commands: {len(recon)}")
if args.access_log:
hits = analyze_web_access_logs(args.access_log)
report["findings"]["access_log_hits"] = hits
print(f"[+] Access log webshell indicators: {len(hits)}")
if args.file_events:
events = load_process_logs(args.file_events)
files = detect_file_creation_in_webroot(events)
report["findings"]["webroot_files"] = files
print(f"[+] Scripts created in webroot: {len(files)}")
with open(args.output, "w") as f:
json.dump(report, f, indent=2, default=str)
print(f"[+] Report saved to {args.output}")
if __name__ == "__main__":
main()