mirror of
https://github.com/mukul975/Anthropic-Cybersecurity-Skills.git
synced 2026-06-16 16:03:17 +03:00
c47eed6a64
- Fix 25 shell=True subprocess calls with list-based commands - Fix 49 verify=False in defensive skills (env-var override) - Add timeout to 231 HTTP/subprocess/socket calls - Fix 6 SQL injection patterns with whitelist validation - Replace 8 __import__() with standard imports - Remove 701 unused imports across 442 files - Add authorized-testing disclaimers to all offensive skills - Complete 11 incomplete skill directories - Expand 10 stub SKILL.md files with full content - Fix 2 YAML parse errors in frontmatter - Fix 5 pre-existing syntax errors - Convert 22 hardcoded paths/ports to environment variables - Back up 21 redundant skill pairs to .bak - Fix 2 global declaration errors - 724/724 skills with full folder anatomy (SKILL.md + agent.py + api-reference.md + LICENSE) - 0 compile errors across all 724 agent.py files
183 lines
6.3 KiB
Python
183 lines
6.3 KiB
Python
#!/usr/bin/env python3
|
|
"""Agent for performing malware triage with YARA.
|
|
|
|
Compiles and applies YARA rules to classify malware samples,
|
|
perform batch scanning, and generate triage reports.
|
|
"""
|
|
|
|
import yara
|
|
import sys
|
|
import json
|
|
import hashlib
|
|
from pathlib import Path
|
|
from collections import defaultdict
|
|
from datetime import datetime
|
|
|
|
|
|
class YaraTriageAgent:
|
|
"""Batch malware triage and classification using YARA rules."""
|
|
|
|
def __init__(self, output_dir):
|
|
self.output_dir = Path(output_dir)
|
|
self.output_dir.mkdir(parents=True, exist_ok=True)
|
|
self.rules = None
|
|
self.results = []
|
|
|
|
def compile_rules(self, rule_paths):
|
|
"""Compile YARA rules from file paths or directories."""
|
|
filepaths = {}
|
|
for path in rule_paths:
|
|
p = Path(path)
|
|
if p.is_file() and p.suffix in (".yar", ".yara"):
|
|
filepaths[p.stem] = str(p)
|
|
elif p.is_dir():
|
|
for rule_file in p.rglob("*.yar"):
|
|
filepaths[rule_file.stem] = str(rule_file)
|
|
for rule_file in p.rglob("*.yara"):
|
|
filepaths[rule_file.stem] = str(rule_file)
|
|
if not filepaths:
|
|
raise ValueError(f"No YARA rule files found in: {rule_paths}")
|
|
self.rules = yara.compile(filepaths=filepaths)
|
|
return len(filepaths)
|
|
|
|
def scan_file(self, filepath):
|
|
"""Scan a single file against compiled YARA rules."""
|
|
filepath = Path(filepath)
|
|
if not filepath.is_file():
|
|
return None
|
|
|
|
with open(filepath, "rb") as f:
|
|
data = f.read()
|
|
|
|
sha256 = hashlib.sha256(data).hexdigest()
|
|
md5 = hashlib.md5(data).hexdigest()
|
|
matches = self.rules.match(data=data)
|
|
|
|
result = {
|
|
"filename": filepath.name,
|
|
"path": str(filepath),
|
|
"sha256": sha256,
|
|
"md5": md5,
|
|
"size": len(data),
|
|
"matches": [],
|
|
"match_count": len(matches),
|
|
"classification": "UNKNOWN",
|
|
}
|
|
|
|
for match in matches:
|
|
match_info = {
|
|
"rule": match.rule,
|
|
"namespace": match.namespace,
|
|
"tags": match.tags,
|
|
"meta": match.meta,
|
|
"strings": [],
|
|
}
|
|
if match.strings:
|
|
for string_match in match.strings[:10]:
|
|
match_info["strings"].append({
|
|
"identifier": string_match[1],
|
|
"offset": hex(string_match[0]),
|
|
"data": string_match[2].decode("utf-8", errors="replace")[:80],
|
|
})
|
|
result["matches"].append(match_info)
|
|
|
|
if result["matches"]:
|
|
result["classification"] = result["matches"][0].get("namespace", "DETECTED").upper()
|
|
|
|
return result
|
|
|
|
def scan_directory(self, sample_dir, recursive=True):
|
|
"""Scan all files in a directory."""
|
|
sample_path = Path(sample_dir)
|
|
glob_fn = sample_path.rglob if recursive else sample_path.glob
|
|
|
|
for filepath in glob_fn("*"):
|
|
if filepath.is_file() and filepath.stat().st_size > 0:
|
|
result = self.scan_file(filepath)
|
|
if result:
|
|
self.results.append(result)
|
|
|
|
return self.results
|
|
|
|
def get_classification_summary(self):
|
|
"""Summarize scan results by classification."""
|
|
summary = defaultdict(int)
|
|
for result in self.results:
|
|
summary[result["classification"]] += 1
|
|
return dict(sorted(summary.items(), key=lambda x: x[1], reverse=True))
|
|
|
|
def get_top_rules(self, limit=20):
|
|
"""Get most frequently matching rules."""
|
|
rule_counts = defaultdict(int)
|
|
for result in self.results:
|
|
for match in result["matches"]:
|
|
rule_counts[match["rule"]] += 1
|
|
return dict(sorted(rule_counts.items(), key=lambda x: x[1], reverse=True)[:limit])
|
|
|
|
def generate_report(self):
|
|
"""Generate comprehensive triage report."""
|
|
classified = [r for r in self.results if r["classification"] != "UNKNOWN"]
|
|
unknown = [r for r in self.results if r["classification"] == "UNKNOWN"]
|
|
|
|
report = {
|
|
"scan_date": datetime.utcnow().isoformat(),
|
|
"total_scanned": len(self.results),
|
|
"classified": len(classified),
|
|
"unknown": len(unknown),
|
|
"classification_rate": round(
|
|
len(classified) / max(len(self.results), 1) * 100, 1
|
|
),
|
|
"classification_summary": self.get_classification_summary(),
|
|
"top_matching_rules": self.get_top_rules(),
|
|
"detected_samples": [
|
|
{
|
|
"filename": r["filename"],
|
|
"sha256": r["sha256"],
|
|
"classification": r["classification"],
|
|
"rules_matched": [m["rule"] for m in r["matches"]],
|
|
}
|
|
for r in classified
|
|
],
|
|
}
|
|
|
|
report_path = self.output_dir / "yara_triage_report.json"
|
|
with open(report_path, "w") as f:
|
|
json.dump(report, f, indent=2)
|
|
|
|
print(f"YARA Triage Results")
|
|
print(f"={'=' * 40}")
|
|
print(f"Scanned: {report['total_scanned']}")
|
|
print(f"Classified: {report['classified']} ({report['classification_rate']}%)")
|
|
print(f"Unknown: {report['unknown']}")
|
|
print(f"\nClassification Summary:")
|
|
for cls, count in report["classification_summary"].items():
|
|
print(f" {cls}: {count}")
|
|
print(f"\nTop Rules:")
|
|
for rule, count in list(report["top_matching_rules"].items())[:10]:
|
|
print(f" {rule}: {count} matches")
|
|
|
|
return report
|
|
|
|
|
|
def main():
|
|
if len(sys.argv) < 3:
|
|
print("Usage: agent.py <rules_path> <samples_dir> [output_dir]")
|
|
print(" rules_path: YARA rule file or directory of .yar files")
|
|
print(" samples_dir: Directory of files to scan")
|
|
sys.exit(1)
|
|
|
|
rules_path = sys.argv[1]
|
|
samples_dir = sys.argv[2]
|
|
output_dir = sys.argv[3] if len(sys.argv) > 3 else "./triage_output"
|
|
|
|
agent = YaraTriageAgent(output_dir)
|
|
rule_count = agent.compile_rules([rules_path])
|
|
print(f"Compiled {rule_count} rule files")
|
|
|
|
agent.scan_directory(samples_dir)
|
|
agent.generate_report()
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|