Files
Anthropic-Cybersecurity-Skills/skills/performing-malware-triage-with-yara/scripts/agent.py
T
mukul975 c47eed6a64 Production hardening: security fixes, code quality, 724 skills complete
- Fix 25 shell=True subprocess calls with list-based commands
- Fix 49 verify=False in defensive skills (env-var override)
- Add timeout to 231 HTTP/subprocess/socket calls
- Fix 6 SQL injection patterns with whitelist validation
- Replace 8 __import__() with standard imports
- Remove 701 unused imports across 442 files
- Add authorized-testing disclaimers to all offensive skills
- Complete 11 incomplete skill directories
- Expand 10 stub SKILL.md files with full content
- Fix 2 YAML parse errors in frontmatter
- Fix 5 pre-existing syntax errors
- Convert 22 hardcoded paths/ports to environment variables
- Back up 21 redundant skill pairs to .bak
- Fix 2 global declaration errors
- 724/724 skills with full folder anatomy (SKILL.md + agent.py + api-reference.md + LICENSE)
- 0 compile errors across all 724 agent.py files
2026-03-19 13:26:49 +01:00

183 lines
6.3 KiB
Python

#!/usr/bin/env python3
"""Agent for performing malware triage with YARA.
Compiles and applies YARA rules to classify malware samples,
perform batch scanning, and generate triage reports.
"""
import yara
import sys
import json
import hashlib
from pathlib import Path
from collections import defaultdict
from datetime import datetime
class YaraTriageAgent:
"""Batch malware triage and classification using YARA rules."""
def __init__(self, output_dir):
self.output_dir = Path(output_dir)
self.output_dir.mkdir(parents=True, exist_ok=True)
self.rules = None
self.results = []
def compile_rules(self, rule_paths):
"""Compile YARA rules from file paths or directories."""
filepaths = {}
for path in rule_paths:
p = Path(path)
if p.is_file() and p.suffix in (".yar", ".yara"):
filepaths[p.stem] = str(p)
elif p.is_dir():
for rule_file in p.rglob("*.yar"):
filepaths[rule_file.stem] = str(rule_file)
for rule_file in p.rglob("*.yara"):
filepaths[rule_file.stem] = str(rule_file)
if not filepaths:
raise ValueError(f"No YARA rule files found in: {rule_paths}")
self.rules = yara.compile(filepaths=filepaths)
return len(filepaths)
def scan_file(self, filepath):
"""Scan a single file against compiled YARA rules."""
filepath = Path(filepath)
if not filepath.is_file():
return None
with open(filepath, "rb") as f:
data = f.read()
sha256 = hashlib.sha256(data).hexdigest()
md5 = hashlib.md5(data).hexdigest()
matches = self.rules.match(data=data)
result = {
"filename": filepath.name,
"path": str(filepath),
"sha256": sha256,
"md5": md5,
"size": len(data),
"matches": [],
"match_count": len(matches),
"classification": "UNKNOWN",
}
for match in matches:
match_info = {
"rule": match.rule,
"namespace": match.namespace,
"tags": match.tags,
"meta": match.meta,
"strings": [],
}
if match.strings:
for string_match in match.strings[:10]:
match_info["strings"].append({
"identifier": string_match[1],
"offset": hex(string_match[0]),
"data": string_match[2].decode("utf-8", errors="replace")[:80],
})
result["matches"].append(match_info)
if result["matches"]:
result["classification"] = result["matches"][0].get("namespace", "DETECTED").upper()
return result
def scan_directory(self, sample_dir, recursive=True):
"""Scan all files in a directory."""
sample_path = Path(sample_dir)
glob_fn = sample_path.rglob if recursive else sample_path.glob
for filepath in glob_fn("*"):
if filepath.is_file() and filepath.stat().st_size > 0:
result = self.scan_file(filepath)
if result:
self.results.append(result)
return self.results
def get_classification_summary(self):
"""Summarize scan results by classification."""
summary = defaultdict(int)
for result in self.results:
summary[result["classification"]] += 1
return dict(sorted(summary.items(), key=lambda x: x[1], reverse=True))
def get_top_rules(self, limit=20):
"""Get most frequently matching rules."""
rule_counts = defaultdict(int)
for result in self.results:
for match in result["matches"]:
rule_counts[match["rule"]] += 1
return dict(sorted(rule_counts.items(), key=lambda x: x[1], reverse=True)[:limit])
def generate_report(self):
"""Generate comprehensive triage report."""
classified = [r for r in self.results if r["classification"] != "UNKNOWN"]
unknown = [r for r in self.results if r["classification"] == "UNKNOWN"]
report = {
"scan_date": datetime.utcnow().isoformat(),
"total_scanned": len(self.results),
"classified": len(classified),
"unknown": len(unknown),
"classification_rate": round(
len(classified) / max(len(self.results), 1) * 100, 1
),
"classification_summary": self.get_classification_summary(),
"top_matching_rules": self.get_top_rules(),
"detected_samples": [
{
"filename": r["filename"],
"sha256": r["sha256"],
"classification": r["classification"],
"rules_matched": [m["rule"] for m in r["matches"]],
}
for r in classified
],
}
report_path = self.output_dir / "yara_triage_report.json"
with open(report_path, "w") as f:
json.dump(report, f, indent=2)
print(f"YARA Triage Results")
print(f"={'=' * 40}")
print(f"Scanned: {report['total_scanned']}")
print(f"Classified: {report['classified']} ({report['classification_rate']}%)")
print(f"Unknown: {report['unknown']}")
print(f"\nClassification Summary:")
for cls, count in report["classification_summary"].items():
print(f" {cls}: {count}")
print(f"\nTop Rules:")
for rule, count in list(report["top_matching_rules"].items())[:10]:
print(f" {rule}: {count} matches")
return report
def main():
if len(sys.argv) < 3:
print("Usage: agent.py <rules_path> <samples_dir> [output_dir]")
print(" rules_path: YARA rule file or directory of .yar files")
print(" samples_dir: Directory of files to scan")
sys.exit(1)
rules_path = sys.argv[1]
samples_dir = sys.argv[2]
output_dir = sys.argv[3] if len(sys.argv) > 3 else "./triage_output"
agent = YaraTriageAgent(output_dir)
rule_count = agent.compile_rules([rules_path])
print(f"Compiled {rule_count} rule files")
agent.scan_directory(samples_dir)
agent.generate_report()
if __name__ == "__main__":
main()