mirror of
https://github.com/mukul975/Anthropic-Cybersecurity-Skills.git
synced 2026-06-10 21:24:56 +03:00
267 lines
8.5 KiB
Python
267 lines
8.5 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
YARA Rule Development and Testing Framework
|
|
|
|
Assists in creating, testing, and optimizing YARA rules
|
|
for malware detection.
|
|
|
|
Requirements:
|
|
pip install yara-python pefile
|
|
|
|
Usage:
|
|
python process.py --analyze sample.exe
|
|
python process.py --test rule.yar --samples ./malware --clean ./goodware
|
|
python process.py --generate --name MalwareX --strings strings.txt
|
|
"""
|
|
|
|
import argparse
|
|
import json
|
|
import os
|
|
import re
|
|
import sys
|
|
import time
|
|
from collections import Counter
|
|
from pathlib import Path
|
|
|
|
try:
|
|
import yara
|
|
except ImportError:
|
|
print("ERROR: yara-python not installed. Run: pip install yara-python")
|
|
sys.exit(1)
|
|
|
|
try:
|
|
import pefile
|
|
except ImportError:
|
|
pefile = None
|
|
|
|
|
|
class YaraRuleBuilder:
|
|
"""Build and test YARA rules."""
|
|
|
|
def __init__(self):
|
|
self.candidate_strings = []
|
|
self.candidate_hex = []
|
|
self.imports = []
|
|
|
|
def analyze_sample(self, filepath):
|
|
"""Extract candidate patterns from a malware sample."""
|
|
with open(filepath, 'rb') as f:
|
|
data = f.read()
|
|
|
|
# Extract ASCII strings (min 8 chars)
|
|
ascii_strings = [
|
|
s.decode('ascii')
|
|
for s in re.findall(rb'[\x20-\x7e]{8,}', data)
|
|
]
|
|
|
|
# Extract wide strings
|
|
wide_strings = [
|
|
s.decode('utf-16-le')
|
|
for s in re.findall(rb'(?:[\x20-\x7e]\x00){8,}', data)
|
|
]
|
|
|
|
# Score strings by uniqueness/suspiciousness
|
|
suspicious = [
|
|
'http', 'https', 'ftp', 'cmd.exe', 'powershell',
|
|
'mutex', 'pipe', 'password', 'encrypt', 'decrypt',
|
|
'inject', 'hook', 'shell', 'backdoor', 'keylog',
|
|
'screenshot', 'clipboard', 'download', 'upload',
|
|
'sandbox', 'vmware', 'virtualbox', 'debug',
|
|
]
|
|
|
|
scored = []
|
|
for s in ascii_strings + wide_strings:
|
|
score = 0
|
|
s_lower = s.lower()
|
|
for kw in suspicious:
|
|
if kw in s_lower:
|
|
score += 10
|
|
if len(s) > 20:
|
|
score += 5
|
|
if re.search(r'[A-Z][a-z]+[A-Z]', s): # CamelCase
|
|
score += 3
|
|
scored.append((s, score))
|
|
|
|
scored.sort(key=lambda x: -x[1])
|
|
self.candidate_strings = scored[:30]
|
|
|
|
# PE imports if available
|
|
if pefile:
|
|
try:
|
|
pe = pefile.PE(filepath)
|
|
if hasattr(pe, 'DIRECTORY_ENTRY_IMPORT'):
|
|
for entry in pe.DIRECTORY_ENTRY_IMPORT:
|
|
for imp in entry.imports:
|
|
if imp.name:
|
|
self.imports.append(
|
|
imp.name.decode('utf-8', errors='replace')
|
|
)
|
|
except Exception:
|
|
pass
|
|
|
|
return {
|
|
"total_ascii": len(ascii_strings),
|
|
"total_wide": len(wide_strings),
|
|
"top_candidates": [(s, sc) for s, sc in scored[:10]],
|
|
"suspicious_imports": [
|
|
i for i in self.imports
|
|
if i in ['VirtualAlloc', 'VirtualAllocEx',
|
|
'WriteProcessMemory', 'CreateRemoteThread',
|
|
'NtUnmapViewOfSection', 'OpenProcess',
|
|
'CryptEncrypt', 'InternetOpenA']
|
|
],
|
|
}
|
|
|
|
def generate_rule(self, name, author="analyst", description=""):
|
|
"""Generate YARA rule from analyzed patterns."""
|
|
strings_section = []
|
|
conditions = []
|
|
|
|
# Add top candidate strings
|
|
for i, (s, score) in enumerate(self.candidate_strings[:8]):
|
|
if score > 0:
|
|
escaped = s.replace('\\', '\\\\').replace('"', '\\"')
|
|
strings_section.append(
|
|
f'$str{i} = "{escaped}" ascii wide'
|
|
)
|
|
|
|
# Add import-based strings
|
|
sus_imports = [
|
|
i for i in self.imports
|
|
if i in ['VirtualAlloc', 'VirtualAllocEx',
|
|
'WriteProcessMemory', 'CreateRemoteThread']
|
|
]
|
|
for i, imp in enumerate(sus_imports[:4]):
|
|
strings_section.append(f'$api{i} = "{imp}" ascii')
|
|
|
|
# Build condition
|
|
str_count = len([s for s in strings_section if s.startswith('$str')])
|
|
api_count = len([s for s in strings_section if s.startswith('$api')])
|
|
|
|
condition_parts = ['uint16(0) == 0x5A4D', 'filesize < 5MB']
|
|
if str_count > 0:
|
|
threshold = max(2, str_count // 2)
|
|
condition_parts.append(f'{threshold} of ($str*)')
|
|
if api_count > 0:
|
|
condition_parts.append(f'{max(1, api_count - 1)} of ($api*)')
|
|
|
|
rule = f"""rule {name} {{
|
|
meta:
|
|
description = "{description or f'Detects {name}'}"
|
|
author = "{author}"
|
|
date = "{time.strftime('%Y-%m-%d')}"
|
|
tlp = "WHITE"
|
|
|
|
strings:
|
|
{chr(10) + " ".join(strings_section)}
|
|
|
|
condition:
|
|
{" and ".join(condition_parts)}
|
|
}}"""
|
|
return rule
|
|
|
|
def test_rule(self, rule_path_or_text, sample_dir, clean_dir=None):
|
|
"""Test YARA rule for detection and false positive rates."""
|
|
if os.path.isfile(rule_path_or_text):
|
|
rules = yara.compile(filepath=rule_path_or_text)
|
|
else:
|
|
rules = yara.compile(source=rule_path_or_text)
|
|
|
|
results = {
|
|
"true_positives": 0,
|
|
"false_negatives": 0,
|
|
"false_positives": 0,
|
|
"true_negatives": 0,
|
|
"scan_time": 0,
|
|
"details": [],
|
|
}
|
|
|
|
# Scan malware samples
|
|
start = time.perf_counter()
|
|
for f in Path(sample_dir).rglob('*'):
|
|
if f.is_file():
|
|
try:
|
|
matches = rules.match(str(f))
|
|
if matches:
|
|
results["true_positives"] += 1
|
|
else:
|
|
results["false_negatives"] += 1
|
|
results["details"].append(
|
|
{"file": str(f), "result": "FALSE_NEGATIVE"}
|
|
)
|
|
except Exception:
|
|
pass
|
|
|
|
# Scan clean files
|
|
if clean_dir:
|
|
for f in Path(clean_dir).rglob('*'):
|
|
if f.is_file():
|
|
try:
|
|
matches = rules.match(str(f))
|
|
if matches:
|
|
results["false_positives"] += 1
|
|
results["details"].append(
|
|
{"file": str(f), "result": "FALSE_POSITIVE"}
|
|
)
|
|
else:
|
|
results["true_negatives"] += 1
|
|
except Exception:
|
|
pass
|
|
|
|
results["scan_time"] = time.perf_counter() - start
|
|
|
|
total_samples = results["true_positives"] + results["false_negatives"]
|
|
if total_samples > 0:
|
|
results["detection_rate"] = round(
|
|
results["true_positives"] / total_samples * 100, 2
|
|
)
|
|
|
|
total_clean = results["false_positives"] + results["true_negatives"]
|
|
if total_clean > 0:
|
|
results["fp_rate"] = round(
|
|
results["false_positives"] / total_clean * 100, 4
|
|
)
|
|
|
|
return results
|
|
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser(
|
|
description="YARA Rule Development Framework"
|
|
)
|
|
parser.add_argument("--analyze", help="Analyze sample for YARA patterns")
|
|
parser.add_argument("--generate", action="store_true",
|
|
help="Generate rule from analysis")
|
|
parser.add_argument("--name", default="MalwareDetection",
|
|
help="Rule name")
|
|
parser.add_argument("--test", help="Test YARA rule file")
|
|
parser.add_argument("--samples", help="Malware samples directory")
|
|
parser.add_argument("--clean", help="Clean files directory")
|
|
parser.add_argument("--output", help="Output rule file")
|
|
|
|
args = parser.parse_args()
|
|
builder = YaraRuleBuilder()
|
|
|
|
if args.analyze:
|
|
analysis = builder.analyze_sample(args.analyze)
|
|
print(json.dumps(analysis, indent=2, default=str))
|
|
|
|
if args.generate:
|
|
rule = builder.generate_rule(args.name)
|
|
print(f"\n{rule}")
|
|
if args.output:
|
|
with open(args.output, 'w') as f:
|
|
f.write(rule)
|
|
print(f"[+] Rule saved to {args.output}")
|
|
|
|
elif args.test and args.samples:
|
|
results = builder.test_rule(args.test, args.samples, args.clean)
|
|
print(json.dumps(results, indent=2))
|
|
|
|
else:
|
|
parser.print_help()
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|