mirror of
https://github.com/mukul975/Anthropic-Cybersecurity-Skills.git
synced 2026-07-05 15:29:01 +03:00
372 lines
16 KiB
Markdown
372 lines
16 KiB
Markdown
---
|
|
name: exploiting-excessive-data-exposure-in-api
|
|
description: >
|
|
Tests APIs for excessive data exposure where endpoints return more data than the client
|
|
application needs, relying on the frontend to filter sensitive fields. The tester intercepts
|
|
API responses and analyzes them for leaked PII, internal identifiers, debug information,
|
|
or sensitive business data that the UI does not display but the API transmits. This maps to
|
|
OWASP API3:2023 Broken Object Property Level Authorization. Activates for requests involving
|
|
API data leakage testing, excessive data exposure, response filtering bypass, or API over-fetching.
|
|
domain: cybersecurity
|
|
subdomain: api-security
|
|
tags: [api-security, owasp, data-exposure, rest-security, pii-leakage]
|
|
version: 1.0.0
|
|
author: mahipal
|
|
license: MIT
|
|
---
|
|
# Exploiting Excessive Data Exposure in API
|
|
|
|
## When to Use
|
|
|
|
- Testing APIs where the frontend displays a subset of data but the API response includes additional fields
|
|
- Assessing mobile application APIs where responses are designed for multiple client types and may contain excess data
|
|
- Identifying PII leakage in API responses that include email addresses, phone numbers, SSNs, or payment data not shown in the UI
|
|
- Testing GraphQL APIs where clients can request arbitrary fields including sensitive attributes
|
|
- Evaluating APIs after microservice refactoring where internal service-to-service data leaks into public endpoints
|
|
|
|
**Do not use** without written authorization. Data exposure testing involves capturing and analyzing potentially sensitive personal data.
|
|
|
|
## Prerequisites
|
|
|
|
- Written authorization specifying target API endpoints and scope
|
|
- Burp Suite Professional or mitmproxy configured as intercepting proxy
|
|
- Two test accounts at different privilege levels (regular user and admin)
|
|
- Browser developer tools or mobile proxy setup for traffic capture
|
|
- Python 3.10+ with `requests` and `json` libraries
|
|
- API documentation (OpenAPI spec) for comparison against actual responses
|
|
|
|
## Workflow
|
|
|
|
### Step 1: Response Schema Discovery
|
|
|
|
Compare documented API responses with actual responses:
|
|
|
|
```python
|
|
import requests
|
|
import json
|
|
|
|
BASE_URL = "https://target-api.example.com/api/v1"
|
|
headers = {"Authorization": "Bearer <user_token>", "Content-Type": "application/json"}
|
|
|
|
# Fetch a resource and analyze all returned fields
|
|
endpoints_to_test = [
|
|
("GET", "/users/me", None),
|
|
("GET", "/users/me/orders", None),
|
|
("GET", "/products", None),
|
|
("GET", "/users/me/settings", None),
|
|
("GET", "/transactions", None),
|
|
]
|
|
|
|
for method, path, body in endpoints_to_test:
|
|
resp = requests.request(method, f"{BASE_URL}{path}", headers=headers, json=body)
|
|
if resp.status_code == 200:
|
|
data = resp.json()
|
|
# Recursively extract all field names
|
|
def extract_fields(obj, prefix=""):
|
|
fields = []
|
|
if isinstance(obj, dict):
|
|
for k, v in obj.items():
|
|
full_key = f"{prefix}.{k}" if prefix else k
|
|
fields.append(full_key)
|
|
fields.extend(extract_fields(v, full_key))
|
|
elif isinstance(obj, list) and obj:
|
|
fields.extend(extract_fields(obj[0], f"{prefix}[]"))
|
|
return fields
|
|
|
|
all_fields = extract_fields(data)
|
|
print(f"\n{method} {path} - {len(all_fields)} fields returned:")
|
|
for f in sorted(all_fields):
|
|
print(f" {f}")
|
|
```
|
|
|
|
### Step 2: Sensitive Data Pattern Detection
|
|
|
|
Scan API responses for sensitive data patterns:
|
|
|
|
```python
|
|
import re
|
|
|
|
SENSITIVE_PATTERNS = {
|
|
"email": r'[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}',
|
|
"phone": r'(\+?1?\s?\(?\d{3}\)?[\s.-]?\d{3}[\s.-]?\d{4})',
|
|
"ssn": r'\b\d{3}-\d{2}-\d{4}\b',
|
|
"credit_card": r'\b(?:4[0-9]{12}(?:[0-9]{3})?|5[1-5][0-9]{14}|3[47][0-9]{13})\b',
|
|
"password_hash": r'\$2[aby]?\$\d{2}\$[./A-Za-z0-9]{53}',
|
|
"api_key": r'(?:api[_-]?key|apikey)["\s:=]+["\']?([a-zA-Z0-9_\-]{20,})',
|
|
"internal_ip": r'\b(?:10\.\d{1,3}|172\.(?:1[6-9]|2\d|3[01])|192\.168)\.\d{1,3}\.\d{1,3}\b',
|
|
"aws_key": r'AKIA[0-9A-Z]{16}',
|
|
"jwt_token": r'eyJ[A-Za-z0-9_-]+\.eyJ[A-Za-z0-9_-]+\.[A-Za-z0-9_-]+',
|
|
"uuid": r'[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}',
|
|
}
|
|
|
|
SENSITIVE_FIELD_NAMES = [
|
|
"password", "password_hash", "secret", "token", "ssn", "social_security",
|
|
"credit_card", "card_number", "cvv", "pin", "private_key", "api_key",
|
|
"internal_id", "debug", "trace", "stack_trace", "created_by_ip",
|
|
"last_login_ip", "salt", "session_id", "refresh_token", "mfa_secret",
|
|
"date_of_birth", "bank_account", "routing_number", "tax_id"
|
|
]
|
|
|
|
def scan_response(endpoint, response_text):
|
|
findings = []
|
|
# Check for sensitive data patterns in values
|
|
for pattern_name, pattern in SENSITIVE_PATTERNS.items():
|
|
matches = re.findall(pattern, response_text)
|
|
if matches:
|
|
findings.append({
|
|
"endpoint": endpoint,
|
|
"type": "sensitive_value",
|
|
"pattern": pattern_name,
|
|
"count": len(matches),
|
|
"sample": matches[0][:20] + "..." if len(matches[0]) > 20 else matches[0]
|
|
})
|
|
|
|
# Check for sensitive field names
|
|
response_lower = response_text.lower()
|
|
for field in SENSITIVE_FIELD_NAMES:
|
|
if f'"{field}"' in response_lower or f"'{field}'" in response_lower:
|
|
findings.append({
|
|
"endpoint": endpoint,
|
|
"type": "sensitive_field",
|
|
"field_name": field
|
|
})
|
|
|
|
return findings
|
|
|
|
# Scan all endpoint responses
|
|
for method, path, body in endpoints_to_test:
|
|
resp = requests.request(method, f"{BASE_URL}{path}", headers=headers, json=body)
|
|
if resp.status_code == 200:
|
|
findings = scan_response(f"{method} {path}", resp.text)
|
|
for f in findings:
|
|
print(f"[FINDING] {f['endpoint']}: {f['type']} - {f.get('pattern', f.get('field_name'))}")
|
|
```
|
|
|
|
### Step 3: Compare UI Display vs API Response
|
|
|
|
```python
|
|
# Fields the UI shows (observed from the frontend application)
|
|
ui_displayed_fields = {
|
|
"/users/me": {"name", "email", "avatar_url", "role"},
|
|
"/users/me/orders": {"order_id", "date", "status", "total"},
|
|
"/products": {"id", "name", "price", "image_url", "description"},
|
|
}
|
|
|
|
# Fields the API actually returns
|
|
for method, path, body in endpoints_to_test:
|
|
resp = requests.request(method, f"{BASE_URL}{path}", headers=headers, json=body)
|
|
if resp.status_code == 200:
|
|
data = resp.json()
|
|
if isinstance(data, list):
|
|
actual_fields = set(data[0].keys()) if data else set()
|
|
elif isinstance(data, dict):
|
|
# Handle paginated responses
|
|
items_key = next((k for k in data if isinstance(data[k], list)), None)
|
|
if items_key and data[items_key]:
|
|
actual_fields = set(data[items_key][0].keys())
|
|
else:
|
|
actual_fields = set(data.keys())
|
|
else:
|
|
continue
|
|
|
|
expected = ui_displayed_fields.get(path, set())
|
|
excess = actual_fields - expected
|
|
if excess:
|
|
print(f"\n{method} {path} - EXCESS FIELDS (not shown in UI):")
|
|
for field in sorted(excess):
|
|
print(f" - {field}")
|
|
```
|
|
|
|
### Step 4: Test User Object Exposure in Related Endpoints
|
|
|
|
```python
|
|
# Many APIs embed full user objects in responses for orders, comments, etc.
|
|
endpoints_with_user_objects = [
|
|
"/orders", # Each order may include full seller/buyer profile
|
|
"/comments", # Comments may include full author profile
|
|
"/reviews", # Reviews may expose reviewer details
|
|
"/transactions", # Transactions may include counterparty info
|
|
"/team/members", # Team listing may expose excessive member data
|
|
]
|
|
|
|
for path in endpoints_with_user_objects:
|
|
resp = requests.get(f"{BASE_URL}{path}", headers=headers)
|
|
if resp.status_code == 200:
|
|
text = resp.text
|
|
# Check for user data leakage in nested objects
|
|
user_fields_found = []
|
|
for field in ["password_hash", "last_login_ip", "mfa_enabled", "phone_number",
|
|
"date_of_birth", "ssn", "internal_notes", "salary", "address"]:
|
|
if f'"{field}"' in text:
|
|
user_fields_found.append(field)
|
|
if user_fields_found:
|
|
print(f"[EXCESSIVE] {path} exposes user fields: {user_fields_found}")
|
|
```
|
|
|
|
### Step 5: GraphQL Over-Fetching Analysis
|
|
|
|
```python
|
|
# GraphQL allows clients to request any available field
|
|
GRAPHQL_URL = f"{BASE_URL}/graphql"
|
|
|
|
# Introspection query to discover all fields on User type
|
|
introspection = {
|
|
"query": """
|
|
{
|
|
__type(name: "User") {
|
|
fields {
|
|
name
|
|
type {
|
|
name
|
|
kind
|
|
}
|
|
}
|
|
}
|
|
}
|
|
"""
|
|
}
|
|
|
|
resp = requests.post(GRAPHQL_URL, headers=headers, json=introspection)
|
|
if resp.status_code == 200:
|
|
fields = resp.json().get("data", {}).get("__type", {}).get("fields", [])
|
|
print("Available User fields via GraphQL:")
|
|
for f in fields:
|
|
sensitivity = "SENSITIVE" if f["name"] in SENSITIVE_FIELD_NAMES else "normal"
|
|
print(f" {f['name']} ({f['type']['name']}) [{sensitivity}]")
|
|
|
|
# Try to query sensitive fields
|
|
sensitive_query = {
|
|
"query": """
|
|
query {
|
|
users {
|
|
id
|
|
email
|
|
passwordHash
|
|
socialSecurityNumber
|
|
internalNotes
|
|
lastLoginIp
|
|
mfaSecret
|
|
apiKey
|
|
}
|
|
}
|
|
"""
|
|
}
|
|
resp = requests.post(GRAPHQL_URL, headers=headers, json=sensitive_query)
|
|
if resp.status_code == 200 and "errors" not in resp.json():
|
|
print("[CRITICAL] GraphQL exposes sensitive user fields without restriction")
|
|
```
|
|
|
|
### Step 6: Debug and Internal Data Leakage
|
|
|
|
```python
|
|
# Test for debug information in responses
|
|
debug_headers_to_check = [
|
|
"X-Debug-Token", "X-Debug-Info", "Server", "X-Powered-By",
|
|
"X-Request-Id", "X-Correlation-Id", "X-Backend-Server",
|
|
"X-Runtime", "X-Version", "X-Build-Version"
|
|
]
|
|
|
|
resp = requests.get(f"{BASE_URL}/users/me", headers=headers)
|
|
for h in debug_headers_to_check:
|
|
if h.lower() in {k.lower(): v for k, v in resp.headers.items()}:
|
|
print(f"[INFO LEAK] Header {h}: {resp.headers.get(h)}")
|
|
|
|
# Test error responses for stack traces
|
|
error_payloads = [
|
|
("GET", "/users/invalid-id-format", None),
|
|
("POST", "/orders", {"invalid": "payload"}),
|
|
("GET", "/users/-1", None),
|
|
("GET", "/users/0", None),
|
|
]
|
|
|
|
for method, path, body in error_payloads:
|
|
resp = requests.request(method, f"{BASE_URL}{path}", headers=headers, json=body)
|
|
if resp.status_code >= 400:
|
|
text = resp.text.lower()
|
|
if any(kw in text for kw in ["stack trace", "traceback", "at com.", "at org.",
|
|
"file \"", "line ", "exception", "sql", "query"]):
|
|
print(f"[DEBUG LEAK] {method} {path} -> {resp.status_code}: Contains stack trace or query info")
|
|
```
|
|
|
|
## Key Concepts
|
|
|
|
| Term | Definition |
|
|
|------|------------|
|
|
| **Excessive Data Exposure** | API returns more data fields than the client needs, relying on frontend filtering to hide sensitive information from users |
|
|
| **Over-Fetching** | Requesting or receiving more data than needed for a specific operation, common in REST APIs that return fixed response schemas |
|
|
| **Response Filtering** | Client-side filtering of API response data to display only relevant fields, which provides zero security since the full response is interceptable |
|
|
| **Object Property Level Authorization** | OWASP API3:2023 - ensuring that users can only read/write object properties they are authorized to access |
|
|
| **PII Leakage** | Unintended exposure of Personally Identifiable Information in API responses including names, emails, addresses, SSNs, or financial data |
|
|
| **Schema Validation** | Enforcing that API responses conform to a defined schema, stripping unauthorized fields before transmission |
|
|
|
|
## Tools & Systems
|
|
|
|
- **Burp Suite Professional**: Intercept API responses and use the Comparer tool to diff expected vs actual response schemas
|
|
- **mitmproxy**: Scriptable proxy for automated response analysis with Python-based content inspection scripts
|
|
- **OWASP ZAP**: Passive scanner detects information disclosure in headers, error messages, and response bodies
|
|
- **Postman**: Compare documented response schemas against actual API responses using test scripts
|
|
- **jq**: Command-line JSON processor for extracting and analyzing specific fields from API responses
|
|
|
|
## Common Scenarios
|
|
|
|
### Scenario: Mobile Banking API Data Exposure Assessment
|
|
|
|
**Context**: A mobile banking application's API returns full account objects to the mobile client, which only displays account nickname and balance. The API is accessed by both iOS and Android apps and a web portal.
|
|
|
|
**Approach**:
|
|
1. Configure mitmproxy on a test device and authenticate as the test user
|
|
2. Capture all API responses during a complete user session (login, view accounts, transfer, logout)
|
|
3. Analyze `GET /api/v1/accounts` response: UI shows 4 fields but API returns 23 fields
|
|
4. Discover that the API returns `routing_number`, `account_holder_ssn_last4`, `internal_risk_score`, `kyc_verification_status`, and `linked_external_accounts` - none shown in UI
|
|
5. Analyze `GET /api/v1/transactions` response: API returns `merchant_id`, `terminal_id`, `authorization_code`, `processor_response` fields not needed by the client
|
|
6. Check `GET /api/v1/users/me`: API returns `last_login_ip`, `mfa_backup_codes_remaining`, `account_officer_name`, and `credit_score_band`
|
|
7. Test error responses: `POST /api/v1/transfers` with invalid payload returns SQL table name in error message
|
|
|
|
**Pitfalls**:
|
|
- Only checking top-level fields and missing sensitive data in deeply nested objects
|
|
- Not testing paginated responses where subsequent pages may include different fields
|
|
- Ignoring response headers that may leak server version, backend technology, or internal routing information
|
|
- Missing data exposure in error responses which often contain stack traces, SQL queries, or internal paths
|
|
- Assuming that HTTPS encryption prevents data exposure (it protects in transit, not from the authenticated client)
|
|
|
|
## Output Format
|
|
|
|
```
|
|
## Finding: Excessive Data Exposure in Account and Transaction APIs
|
|
|
|
**ID**: API-DATA-001
|
|
**Severity**: High (CVSS 7.1)
|
|
**OWASP API**: API3:2023 - Broken Object Property Level Authorization
|
|
**Affected Endpoints**:
|
|
- GET /api/v1/accounts
|
|
- GET /api/v1/transactions
|
|
- GET /api/v1/users/me
|
|
|
|
**Description**:
|
|
The API returns full database objects to the client, including sensitive fields
|
|
that are not displayed in the mobile application UI. The mobile app filters
|
|
these fields client-side, but they are fully accessible by intercepting the
|
|
API response. This exposes SSN fragments, internal risk scores, and KYC
|
|
verification data for any authenticated user.
|
|
|
|
**Excess Fields Discovered**:
|
|
- /accounts: routing_number, account_holder_ssn_last4, internal_risk_score,
|
|
kyc_verification_status, linked_external_accounts (18 excess fields total)
|
|
- /transactions: merchant_id, terminal_id, authorization_code,
|
|
processor_response (12 excess fields total)
|
|
- /users/me: last_login_ip, mfa_backup_codes_remaining, credit_score_band
|
|
|
|
**Impact**:
|
|
An authenticated user can extract sensitive financial data, internal risk
|
|
assessments, and PII for their own account that the application is not
|
|
intended to reveal. Combined with BOLA vulnerabilities, this data could
|
|
be extracted for all users.
|
|
|
|
**Remediation**:
|
|
1. Implement server-side response filtering using DTOs/view models that only include fields needed by the client
|
|
2. Use GraphQL field-level authorization or REST response schemas per endpoint per role
|
|
3. Remove sensitive fields from API responses at the serialization layer
|
|
4. Implement response schema validation in the API gateway to strip undocumented fields
|
|
5. Add automated tests that verify response schemas match documentation
|
|
```
|