python curl转python脚本

curl → Python requests 转换器

复制代码
import shlex
import json
import re
from dataclasses import dataclass, field
from typing import Optional


@dataclass
class CurlCommand:
    method: str = "GET"
    url: str = ""
    headers: dict = field(default_factory=dict)
    data: Optional[str] = None
    data_raw: Optional[str] = None
    data_binary: Optional[str] = None
    form: list = field(default_factory=list)
    cookies: dict = field(default_factory=dict)
    auth: Optional[str] = None
    proxy: Optional[str] = None
    timeout: Optional[int] = None
    connect_timeout: Optional[int] = None
    verify: bool = True
    allow_redirects: bool = True
    user_agent: Optional[str] = None
    compressed: bool = False


def _preprocess_curl(command: str) -> str:
    """预处理 curl 命令:去除 bash 特殊语法"""
    # 处理反引号包裹的 URL:`https://...` → https://...
    command = re.sub(r'`([^`]+)`', r'\1', command)

    # 处理 bash $'...' ANSI-C 引号,用 shlex.quote 安全重新引用
    command = re.sub(
        r"\$'((?:[^'\\]|\\.)*)'",
        lambda m: shlex.quote(_unescape_bash(m.group(1))),
        command,
    )

    return command


def _unescape_bash(s: str) -> str:
    """解析 bash ANSI-C 转义序列"""
    escapes = {
        "\\a": "\a", "\\b": "\b", "\\e": "\x1b", "\\f": "\f",
        "\\n": "\n", "\\r": "\r", "\\t": "\t", "\\v": "\v",
        "\\\\": "\\", "\\'": "'", '\\"': '"',
    }
    result = []
    i = 0
    while i < len(s):
        if s[i] == "\\" and i + 1 < len(s):
            two = s[i:i + 2]
            if two in escapes:
                result.append(escapes[two])
                i += 2
                continue
        result.append(s[i])
        i += 1
    return "".join(result)


def tokenize(curl_command: str) -> list:
    """将 curl 命令字符串分割为 token 列表"""
    command = curl_command.strip()

    # 去掉开头的 "curl "
    if command.startswith("curl "):
        command = command[5:]
    if command.startswith("curl\n"):
        command = command[5:]

    # 处理多行命令(反斜杠续行)
    command = re.sub(r'\\\s*\n\s*', ' ', command)

    # 预处理 bash 特殊语法
    command = _preprocess_curl(command)

    try:
        tokens = shlex.split(command, posix=True)
    except ValueError:
        tokens = _fallback_tokenize(command)

    return tokens


def _fallback_tokenize(command: str) -> list:
    """Windows 下 shlex 失败的降级 tokenizer"""
    tokens = []
    current = ""
    in_single = False
    in_double = False
    i = 0

    while i < len(command):
        ch = command[i]
        if in_single:
            if ch == "'":
                in_single = False
            else:
                current += ch
        elif in_double:
            if ch == '"':
                in_double = False
            elif ch == '\\' and i + 1 < len(command):
                i += 1
                current += command[i]
            else:
                current += ch
        elif ch == "'":
            in_single = True
        elif ch == '"':
            in_double = True
        elif ch in (' ', '\t'):
            if current:
                tokens.append(current)
                current = ""
        else:
            current += ch
        i += 1

    if current:
        tokens.append(current)
    return tokens


def parse_curl(curl_command: str) -> CurlCommand:
    """解析 curl 命令为结构化对象"""
    tokens = tokenize(curl_command)
    result = CurlCommand()

    i = 0
    while i < len(tokens):
        token = tokens[i]

        # HTTP 方法
        if token in ("-X", "--request"):
            i += 1
            if i < len(tokens):
                result.method = tokens[i].upper()

        # Header
        elif token in ("-H", "--header"):
            i += 1
            if i < len(tokens):
                _parse_header(result, tokens[i])

        # Data / Body
        elif token in ("-d", "--data", "--data-ascii"):
            i += 1
            if i < len(tokens):
                result.data = tokens[i]

        elif token == "--data-raw":
            i += 1
            if i < len(tokens):
                result.data_raw = tokens[i]

        elif token == "--data-binary":
            i += 1
            if i < len(tokens):
                result.data_binary = tokens[i]

        elif token == "--data-urlencode":
            i += 1
            if i < len(tokens):
                if result.data is None:
                    result.data = ""
                if result.data:
                    result.data += "&"
                result.data += tokens[i]

        # Form / Multipart
        elif token in ("-F", "--form"):
            i += 1
            if i < len(tokens):
                result.form.append(tokens[i])

        # Cookie
        elif token in ("-b", "--cookie"):
            i += 1
            if i < len(tokens):
                _parse_cookies(result, tokens[i])

        # Auth
        elif token in ("-u", "--user"):
            i += 1
            if i < len(tokens):
                result.auth = tokens[i]

        # Proxy
        elif token in ("-x", "--proxy"):
            i += 1
            if i < len(tokens):
                result.proxy = tokens[i]

        # Timeout
        elif token in ("-m", "--max-time"):
            i += 1
            if i < len(tokens):
                result.timeout = int(tokens[i])

        elif token == "--connect-timeout":
            i += 1
            if i < len(tokens):
                result.connect_timeout = int(tokens[i])

        # Insecure (skip SSL)
        elif token in ("-k", "--insecure"):
            result.verify = False

        # Follow redirects
        elif token in ("-L", "--location"):
            # 默认就是 follow redirects,但也可以显式设置
            pass

        elif token == "--no-location":
            result.allow_redirects = False

        # HEAD request
        elif token in ("-I", "--head"):
            result.method = "HEAD"

        # GET query params
        elif token in ("-G", "--get"):
            result.method = "GET"

        # Compressed
        elif token == "--compressed":
            result.compressed = True

        # User-Agent
        elif token in ("-A", "--user-agent"):
            i += 1
            if i < len(tokens):
                result.user_agent = tokens[i]

        # Output file (跳过)
        elif token in ("-o", "--output"):
            i += 1

        # 组合短选项(如 -sSL)
        elif token.startswith("-") and not token.startswith("--") and len(token) > 2:
            if not any(c in token for c in ("X", "H", "d", "F", "b", "u", "x", "m", "A")):
                i += 1
                continue

        # URL
        elif not token.startswith("-"):
            result.url = token.strip().strip('`').strip()

        i += 1

    # 自动推断 method(有 data 时默认 POST)
    if result.method == "GET" and _has_body(result):
        result.method = "POST"

    return result


def _has_body(cmd: CurlCommand) -> bool:
    return any([
        cmd.data is not None,
        cmd.data_raw is not None,
        cmd.data_binary is not None,
        len(cmd.form) > 0,
    ])


def _is_binary(s: str) -> bool:
    for ch in s:
        o = ord(ch)
        if o == 0 or (o < 9 and o != 0) or (14 <= o <= 31) or o == 127:
            return True
    return False


def _format_bytes(s: str) -> str:
    return repr(s.encode("utf-8", errors="backslashreplace"))


def _format_data_value(s: str) -> str:
    if _is_binary(s):
        return _format_bytes(s)
    if '\n' in s:
        if '"""' not in s:
            return '"""\n' + s + '\n"""'
        elif "'''" not in s:
            return "'''\n" + s + "\n'''"
        else:
            escaped = s.replace('\\', '\\\\').replace('"""', '\\"\\"\\"')
            return '"""\n' + escaped + '\n"""'
    return _format_value(s)


def _parse_header(result: CurlCommand, header_str: str):
    """解析 header 字符串 'Key: Value'"""
    if ":" in header_str:
        key, value = header_str.split(":", 1)
        key = key.strip()
        value = value.strip().strip('`').strip()
        key_lower = key.lower()

        if key_lower == "content-type":
            result.headers["Content-Type"] = value
        elif key_lower == "user-agent":
            result.user_agent = value
        elif key_lower == "cookie":
            _parse_cookie_string(result, value)
        else:
            result.headers[key] = value


def _parse_cookies(result: CurlCommand, cookie_str: str):
    """解析 cookie 字符串"""
    if "=" in cookie_str:
        _parse_cookie_string(result, cookie_str)


def _parse_cookie_string(result: CurlCommand, cookie_str: str):
    """解析 cookie 字符串 'key=value; key2=value2'"""
    for part in cookie_str.split(";"):
        part = part.strip()
        if "=" in part:
            key, value = part.split("=", 1)
            result.cookies[key.strip()] = value.strip()


def generate_python(curl: CurlCommand) -> str:
    """生成 Python requests 代码"""
    lines = ["import requests"]
    var_lines = []
    kwargs = []
    body_var = None

    all_headers = dict(curl.headers)
    if curl.user_agent and "User-Agent" not in all_headers:
        all_headers["User-Agent"] = curl.user_agent
    if curl.compressed and "Accept-Encoding" not in all_headers:
        all_headers["Accept-Encoding"] = "gzip, deflate, br"

    if all_headers:
        var_lines.append(f"headers = {_format_dict(all_headers)}")
        kwargs.append("headers=headers")

    if curl.cookies:
        var_lines.append(f"cookies = {_format_dict(curl.cookies)}")
        kwargs.append("cookies=cookies")

    content_type = all_headers.get("Content-Type", "")

    if curl.data_binary is not None:
        var_lines.append(f"data = {_format_data_value(curl.data_binary)}")
        body_var = "data"
    elif curl.data_raw is not None:
        var_lines.append(f"data = {_format_data_value(curl.data_raw)}")
        body_var = "data"
    elif curl.data is not None:
        if "json" in content_type:
            try:
                parsed = json.loads(curl.data)
                var_lines.append(f"json_data = {_format_python_value(parsed)}")
                body_var = "json_data"
                kwargs.append("json=json_data")
                body_var = None
            except (json.JSONDecodeError, ValueError):
                var_lines.append(f"data = {_format_data_value(curl.data)}")
                body_var = "data"
        else:
            var_lines.append(f"data = {_format_data_value(curl.data)}")
            body_var = "data"
    elif curl.form:
        files, form_data = _parse_form(curl.form)
        if files:
            var_lines.append(f"files = {_format_dict(files)}")
            kwargs.append("files=files")
        if form_data:
            var_lines.append(f"data = {_format_dict(form_data)}")
            body_var = "data"

    if curl.auth:
        if ":" in curl.auth:
            user, pwd = curl.auth.split(":", 1)
            var_lines.append(f"auth = ({_format_value(user)}, {_format_value(pwd)})")
        else:
            var_lines.append(f"auth = ({_format_value(curl.auth)}, '')")
        kwargs.append("auth=auth")

    if curl.proxy:
        proxy_dict = {"http": curl.proxy, "https": curl.proxy}
        var_lines.append(f"proxies = {_format_dict(proxy_dict)}")
        kwargs.append("proxies=proxies")

    if not curl.verify:
        kwargs.append("verify=False")

    if curl.timeout is not None or curl.connect_timeout is not None:
        t = curl.timeout or curl.connect_timeout
        var_lines.append(f"timeout = {t}")
        kwargs.append("timeout=timeout")

    if body_var:
        kwargs.append(f"{body_var}={body_var}")

    for v in var_lines:
        lines.append(v)

    if var_lines:
        lines.append("")

    if kwargs:
        kwargs_str = ",\n    ".join(kwargs)
        lines.append(f"response = requests.{curl.method.lower()}({_format_value(curl.url)}, {kwargs_str})")
    else:
        lines.append(f"response = requests.{curl.method.lower()}({_format_value(curl.url)})")
    lines.append("print(response.status_code)")
    lines.append("print(response.text)")

    return "\n".join(lines)


def _parse_form(form_items: list) -> tuple:
    """解析 -F/--form 参数,返回 (files_dict, data_dict)"""
    files = {}
    data = {}
    for item in form_items:
        if "=" in item:
            key, value = item.split("=", 1)
            if value.startswith("@"):
                files[key] = f"open({_format_value(value[1:])}, 'rb')"
            else:
                data[key] = value
    return files, data


def _format_python_value(obj) -> str:
    """将 Python 对象格式化为代码字面量"""
    if isinstance(obj, dict):
        if not obj:
            return "{}"
        items = []
        for k, v in obj.items():
            items.append(f"    {_format_value(str(k))}: {_format_python_value(v)}")
        return "{\n" + ",\n".join(items) + "\n}"
    elif isinstance(obj, list):
        if not obj:
            return "[]"
        items = [f"    {_format_python_value(v)}" for v in obj]
        return "[\n" + ",\n".join(items) + "\n]"
    elif isinstance(obj, bool):
        return str(obj)
    elif isinstance(obj, int):
        return str(obj)
    elif isinstance(obj, float):
        return str(obj)
    elif obj is None:
        return "None"
    else:
        return _format_value(str(obj))


def _format_value(value: str) -> str:
    """将字符串格式化为 Python 字面量"""
    if not value:
        return '""'
    if "'" in value and '"' not in value:
        return f'"{value}"'
    if '"' in value and "'" not in value:
        return f"'{value}'"
    escaped = value.replace("\\", "\\\\").replace('"', '\\"')
    return f'"{escaped}"'


def _format_dict(d: dict) -> str:
    """将 dict 格式化为 Python 代码"""
    if not d:
        return "{}"
    items = []
    for k, v in d.items():
        items.append(f"    {_format_value(k)}: {_format_value(v)}")
    return "{\n" + ",\n".join(items) + "\n}"


def convert(curl_command: str) -> str:
    """主入口:将 curl 命令转换为 Python requests 代码"""
    parsed = parse_curl(curl_command)
    return generate_python(parsed)


if __name__ == "__main__":
    import sys

    # 测试用例
    test_cases = [
        # 基本 GET
        'curl https://httpbin.org/get',

        # GET with headers
        'curl -H "Authorization: Bearer token123" https://httpbin.org/get',

        # POST with JSON
        'curl -X POST https://httpbin.org/post -H "Content-Type: application/json" -d \'{"name": "test", "value": 123}\'',

        # POST with form data
        'curl -X POST https://httpbin.org/post -d "username=admin&password=secret"',

        # With auth
        'curl -u user:pass https://httpbin.org/basic-auth/user/pass',

        # With cookies
        'curl -b "session=abc123" https://httpbin.org/cookies',

        # Skip SSL and follow redirects
        'curl -k -L https://example.com',

        # With proxy
        'curl -x http://proxy:8080 https://httpbin.org/ip',

        # With timeout
        'curl -m 30 --connect-timeout 10 https://httpbin.org/delay/5',

        # HEAD request
        'curl -I https://httpbin.org',

        # Compressed
        'curl --compressed https://httpbin.org/gzip',

        # Multiple headers
        'curl -H "Accept: application/json" -H "X-Custom: value" https://httpbin.org/get',
    ]

    print("=" * 60)
    print("  curl → Python requests  转换器")
    print("=" * 60)

    if len(sys.argv) > 1:
        source = " ".join(sys.argv[1:])
        print("\n📥 输入 curl 命令:")
        print(f"   {source}")
        print("\n🐍 生成的 Python 代码:")
        print()
        try:
            print(convert(source))
        except Exception as e:
            print(f"❌ 转换失败: {e}")
    else:
        for i, cmd in enumerate(test_cases, 1):
            print(f"\n{'─' * 60}")
            print(f"测试 #{i}:")
            print(f"  curl: {cmd[:80]}{'...' if len(cmd) > 80 else ''}")
            print(f"  输出:")
            try:
                output = convert(cmd)
                for line in output.split("\n"):
                    print(f"    {line}")
            except Exception as e:
                print(f"    ❌ 错误: {e}")

        print(f"\n{'=' * 60}")
        print("✅ 所有内置测试完成")
        print("💡 用法: python curl_to_python.py '<curl命令>'")

import tkinter as tk
from tkinter import ttk, scrolledtext
from curl_to_python import convert

try:
    import pyperclip
    HAS_PYPERCLIP = True
except ImportError:
    HAS_PYPERCLIP = False


class CurlToPythonApp:
    def __init__(self, root):
        self.root = root
        self.root.title("curl → Python requests 转换器")
        self.root.geometry("900x700")
        self.root.minsize(600, 450)

        style = ttk.Style()
        style.theme_use("clam")

        main_frame = ttk.Frame(root, padding=12)
        main_frame.pack(fill=tk.BOTH, expand=True)

        ttk.Label(main_frame, text="curl → Python requests", font=("Microsoft YaHei", 14, "bold")).pack(anchor=tk.W)

        ttk.Label(main_frame, text="粘贴 curl 命令(支持多行 \\ 续行, bash $'...' 引号, 反引号URL):",
                  font=("Microsoft YaHei", 9)).pack(anchor=tk.W, pady=(8, 2))

        self.input_text = scrolledtext.ScrolledText(main_frame, height=12, font=("Consolas", 10), wrap=tk.NONE)
        self.input_text.pack(fill=tk.BOTH, expand=True, pady=(0, 4))

        btn_frame = ttk.Frame(main_frame)
        btn_frame.pack(fill=tk.X, pady=(0, 8))

        self.convert_btn = ttk.Button(btn_frame, text="转换 →", command=self.do_convert)
        self.convert_btn.pack(side=tk.LEFT, padx=(0, 6))

        self.copy_btn = ttk.Button(btn_frame, text="复制结果", command=self.do_copy)
        self.copy_btn.pack(side=tk.LEFT, padx=(0, 6))

        self.clear_btn = ttk.Button(btn_frame, text="清空", command=self.do_clear)
        self.clear_btn.pack(side=tk.LEFT)

        self.status_label = ttk.Label(btn_frame, text="", foreground="gray")
        self.status_label.pack(side=tk.RIGHT)

        ttk.Label(main_frame, text="生成的 Python 代码:", font=("Microsoft YaHei", 9)).pack(anchor=tk.W, pady=(4, 2))

        self.output_text = scrolledtext.ScrolledText(main_frame, height=20, font=("Consolas", 10), wrap=tk.NONE)
        self.output_text.pack(fill=tk.BOTH, expand=True)

        self.root.bind("<Control-Return>", lambda e: self.do_convert())
        self.root.bind("<Control-c>", lambda e: self.do_copy())
        self.root.protocol("WM_DELETE_WINDOW", self.on_close)

        self.status_label.config(text="就绪 --- 按 Ctrl+Enter 快速转换", foreground="gray")

    def do_convert(self):
        curl_cmd = self.input_text.get("1.0", tk.END).strip()
        if not curl_cmd:
            self.status_label.config(text="请先粘贴 curl 命令", foreground="red")
            return

        try:
            result = convert(curl_cmd)
            self.output_text.delete("1.0", tk.END)
            self.output_text.insert("1.0", result)
            self.status_label.config(text="转换成功 ✓", foreground="green")
        except Exception as e:
            self.status_label.config(text=f"转换失败: {e}", foreground="red")

    def do_copy(self):
        result = self.output_text.get("1.0", tk.END).strip()
        if not result:
            self.status_label.config(text="没有可复制的内容", foreground="red")
            return
        if HAS_PYPERCLIP:
            pyperclip.copy(result)
        else:
            self.root.clipboard_clear()
            self.root.clipboard_append(result)
        self.status_label.config(text="已复制到剪贴板 ✓", foreground="green")

    def do_clear(self):
        self.input_text.delete("1.0", tk.END)
        self.output_text.delete("1.0", tk.END)
        self.status_label.config(text="已清空", foreground="gray")

    def on_close(self):
        self.root.destroy()


def main():
    root = tk.Tk()
    app = CurlToPythonApp(root)
    root.mainloop()


if __name__ == "__main__":
    main()
相关推荐
charlie1145141911 小时前
Linux 字符设备驱动:cdev、设备号与设备模型
linux·开发语言·驱动开发·c
handler011 小时前
Linux 内核剖析:进程优先级、上下文切换与 O(1) 调度算法
linux·运维·c语言·开发语言·c++·笔记·算法
FQNmxDG4S1 小时前
Java泛型编程:类型擦除与泛型方法的应用场景
java·开发语言·python
bzmK1DTbd2 小时前
JDBC编程规范:PreparedStatement与事务管理
数据库·python·eclipse
我星期八休息2 小时前
IT疑难杂症诊疗室:AI时代工程师Superpowers进化论
linux·开发语言·数据结构·人工智能·python·散列表
代码小书生2 小时前
math,一个基础的 Python 库!
人工智能·python·算法
热心网友俣先生2 小时前
2026年第二十三届五一数学建模竞赛C题超详细解题思路+各问题可用模型推荐+部分模型结果展示
c语言·开发语言·数学建模
01漫游者2 小时前
JavaScript函数与对象增强知识
开发语言·javascript·ecmascript
IGAn CTOU2 小时前
Java高级开发进阶教程之系列
java·开发语言