爬虫案例5: 百度翻译--英汉互译

bash 复制代码

目标页面：https://fanyi.baidu.com/
实现：英译汉，汉译英

js_code.js 文件保存下面代码

javascript 复制代码

function fn(t) {
    function n(t, e) {
        for (var n = 0; n < e.length - 2; n += 3) {
            var r = e.charAt(n + 2);
            r = "a" <= r ? r.charCodeAt(0) - 87 : Number(r),
            r = "+" === e.charAt(n + 1) ? t >>> r : t << r,
            t = "+" === e.charAt(n) ? t + r & 4294967295 : t ^ r
        }
        return t
    }
    var r = '320305.131321201'
    var o, i = t.match(/[\\uD800-\\uDBFF][\\uDC00-\\uDFFF]/g);
    if (null === i) {
        var a = t.length;
        a > 30 && (t = "".concat(t.substr(0, 10)).concat(t.substr(Math.floor(a / 2) - 5, 10)).concat(t.substr(-10, 10)))
    } else {
        for (var s = t.split(/[\\uD800-\\uDBFF][\\uDC00-\\uDFFF]/), c = 0, u = s.length, l = []; c < u; c++)
            "" !== s[c] && l.push.apply(l, function(t) {
                if (Array.isArray(t))
                    return e(t)
            }(o = s[c].split("")) || function(t) {
                if ("undefined" != typeof Symbol && null != t[Symbol.iterator] || null != t["@@iterator"])
                    return Array.from(t)
            }(o) || function(t, n) {
                if (t) {
                    if ("string" == typeof t)
                        return e(t, n);
                    var r = Object.prototype.toString.call(t).slice(8, -1);
                    return "Object" === r && t.constructor && (r = t.constructor.name),
                    "Map" === r || "Set" === r ? Array.from(t) : "Arguments" === r || /^(?:Ui|I)nt(?:8|16|32)(?:Clamped)?Array$/.test(r) ? e(t, n) : void 0
                }
            }(o) || function() {
                throw new TypeError("Invalid attempt to spread non-iterable instance.\nIn order to be iterable, non-array objects must have a [Symbol.iterator]() method.")
            }()),
            c !== u - 1 && l.push(i[c]);
        var p = l.length;
        p > 30 && (t = l.slice(0, 10).join("") + l.slice(Math.floor(p / 2) - 5, Math.floor(p / 2) + 5).join("") + l.slice(-10).join(""))
    }
    for (var d = "".concat(String.fromCharCode(103)).concat(String.fromCharCode(116)).concat(String.fromCharCode(107)), h = (null !== r ? r : (r = window[d] || "") || "").split("."), f = Number(h[0]) || 0, m = Number(h[1]) || 0, g = [], y = 0, v = 0; v < t.length; v++) {
        var _ = t.charCodeAt(v);
        _ < 128 ? g[y++] = _ : (_ < 2048 ? g[y++] = _ >> 6 | 192 : (55296 == (64512 & _) && v + 1 < t.length && 56320 == (64512 & t.charCodeAt(v + 1)) ? (_ = 65536 + ((1023 & _) << 10) + (1023 & t.charCodeAt(++v)),
        g[y++] = _ >> 18 | 240,
        g[y++] = _ >> 12 & 63 | 128) : g[y++] = _ >> 12 | 224,
        g[y++] = _ >> 6 & 63 | 128),
        g[y++] = 63 & _ | 128)
    }
    for (var b = f, w = "".concat(String.fromCharCode(43)).concat(String.fromCharCode(45)).concat(String.fromCharCode(97)) + "".concat(String.fromCharCode(94)).concat(String.fromCharCode(43)).concat(String.fromCharCode(54)), k = "".concat(String.fromCharCode(43)).concat(String.fromCharCode(45)).concat(String.fromCharCode(51)) + "".concat(String.fromCharCode(94)).concat(String.fromCharCode(43)).concat(String.fromCharCode(98)) + "".concat(String.fromCharCode(43)).concat(String.fromCharCode(45)).concat(String.fromCharCode(102)), x = 0; x < g.length; x++)
        b = n(b += g[x], w);
    return b = n(b, k),
    (b ^= m) < 0 && (b = 2147483648 + (2147483647 & b)),
    "".concat((b %= 1e6).toString(), ".").concat(b ^ f)
}

main.py 保存下面代码

python 复制代码

import requests
import js2py 
import time
import os


cur_path = os.path.dirname(__file__)

def get_sign(word):
    with open(os.path.join(cur_path, 'js_code.js'), 'r', encoding='utf-8') as f:
        js_code = f.read()
    fn = js2py.eval_js(js_code) # 不需要本地环境配置node.js环境，也可执行js代码
    return fn(word)

def get_langdetect(word):
    url = 'https://fanyi.baidu.com/langdetect'
    data = {
        'query': word
    }
    headers = {
        'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36'
    }
    resp_json_data = requests.post(url, data=data, headers=headers).json()
    return resp_json_data['lan']

def get_cookie():
    with open(os.path.join(cur_path, 'cookie.txt'), 'r', encoding='utf-8') as fp:
        cookie = fp.read()  # cookie 自己在浏览器中找一下即可，保存在同级目录下 cookie.txt中
    return cookie

if __name__ == '__main__':
    word = input('请输入查找的单词: ')
    lan = get_langdetect(word)
    from_lan = ''
    to_lan = ''
    if lan == 'zh':
        from_lan = 'zh'
        to_lan = 'en'
    elif lan == 'en':
        from_lan = 'en'
        to_lan = 'zh'
    fanyi_url = 'https://fanyi.baidu.com/v2transapi?from=' + from_lan + '&to='+ to_lan
    data = {
        'from': from_lan,
        'to': to_lan,
        'query': word,
        'transtype': 'realtime',
        'simple_means_flag': 3,
        'sign': get_sign(word),
        'token': '43c3f0262c18388a7bb9ded33a789a10',
        'domain': 'common',
        'ts': int(time.time() * 1000)
    }
    headers = {
        'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36',
        'Cookie': get_cookie()
    }
    resp = requests.post(url=fanyi_url, data=data, headers=headers)
    if resp.status_code == 200:
        print(f"状态码为200的结果---【{word}】的翻译为: ", resp.json()['trans_result']['data'][0]['dst'])
    else:
        print("状态码不为200的结果: ", resp.json())