最近碰到一个需求:需要批量化把中文弄成英文 ,写了一个py脚本
step1:
python
#!/usr/bin/env python3
"""
文件翻译脚本
功能:读取本地文本文件,将内容翻译成中文,保存为新文件
"""
import re
import json
import urllib.parse
import urllib.request
from pathlib import Path
def is_chinese(text):
"""
判断文本是否包含中文字符
"""
if not text:
return False
return bool(re.search(r'[\u4e00-\u9fff]', text))
def translate_text(text, target_lang='zh-CN'):
"""
翻译单个文本
返回翻译结果列表
"""
if not text or not text.strip():
return []
# 如果已经是中文,直接返回
if target_lang == 'zh-CN' and is_chinese(text):
return [text]
# 构建Google翻译API URL
url = f"https://translate.googleapis.com/translate_a/single?client=gtx&sl=auto&tl={target_lang}&dt=t&dt=at&dt=bd&q={urllib.parse.quote(text)}"
try:
req = urllib.request.Request(url, headers={'User-Agent': 'Mozilla/5.0'})
with urllib.request.urlopen(req) as response:
res_content = response.read().decode('utf-8')
data = json.loads(res_content)
results = []
# 1. 提取主要翻译
if data and isinstance(data[0], list):
main_parts = []
for part in data[0]:
if isinstance(part, list) and len(part) > 0 and part[0]:
main_parts.append(str(part[0]))
main_translation = "".join(main_parts)
if main_translation:
results.append(main_translation)
# 2. 提取备选翻译
if len(data) > 5 and isinstance(data[5], list):
for alternative_group in data[5]:
if isinstance(alternative_group, list) and len(alternative_group) > 1 and isinstance(
alternative_group[1], list):
for alt in alternative_group[1]:
if isinstance(alt, str) and alt and alt not in results:
results.append(alt)
# 3. 提取词典结果
if len(data) > 1 and isinstance(data[1], list):
for pos_group in data[1]:
if isinstance(pos_group, list) and len(pos_group) > 1 and isinstance(pos_group[1], list):
for word in pos_group[1]:
if isinstance(word, str) and word and word not in results:
results.append(word)
return results if results else ["未找到翻译结果"]
except Exception as e:
return [f"翻译错误: {str(e)}"]
def translate_file_content(content, target_lang='zh-CN'):
"""
翻译文件内容
按段落或行处理,保持原文结构
"""
if not content:
return ""
# 按行分割,保持原有结构
lines = content.splitlines(keepends=False)
translated_lines = []
for i, line in enumerate(lines):
line = line.rstrip() # 去掉右侧空白
if not line: # 空行保留
translated_lines.append("")
continue
# 跳过已经是中文的行
if target_lang == 'zh-CN' and is_chinese(line):
translated_lines.append(line)
print(f"第 {i + 1} 行: 已是中文,跳过翻译")
continue
# 翻译该行
print(f"翻译第 {i + 1}/{len(lines)} 行: {line[:50]}..." if len(
line) > 50 else f"翻译第 {i + 1}/{len(lines)} 行: {line}")
try:
results = translate_text(line, target_lang)
if results and results[0]:
translated_lines.append(results[0])
if len(results) > 1:
print(f" -> 备选结果: {', '.join(results[1:3])}")
else:
translated_lines.append(line) # 翻译失败,保留原文
print(f" -> 翻译失败,保留原文")
except Exception as e:
print(f" -> 翻译错误: {e}")
translated_lines.append(line) # 出错时保留原文
return "\n".join(translated_lines)
def translate_file(file_path, target_lang='zh-CN'):
"""
主函数:读取文件,翻译内容,保存新文件
"""
# 检查文件是否存在
path = Path(file_path)
if not path.exists():
print(f"错误: 文件不存在 - {file_path}")
return False
if not path.is_file():
print(f"错误: 不是有效的文件 - {file_path}")
return False
# 生成新文件名
new_file_name = f"{path.stem}_translated{path.suffix}"
new_file_path = path.parent / new_file_name
try:
# 读取文件
print(f"正在读取文件: {file_path}")
with open(file_path, 'r', encoding='utf-8') as f:
content = f.read()
print(f"文件大小: {len(content)} 字符")
# 如果是空文件
if not content.strip():
print("文件内容为空")
return False
# 翻译内容
print(f"开始翻译,目标语言: {target_lang}")
print("-" * 50)
translated_content = translate_file_content(content, target_lang)
print("-" * 50)
print("翻译完成!")
# 保存新文件
print(f"正在保存文件: {new_file_path}")
with open(new_file_path, 'w', encoding='utf-8') as f:
f.write(translated_content)
# 显示统计信息
original_lines = len([l for l in content.splitlines() if l.strip()])
translated_lines = len([l for l in translated_content.splitlines() if l.strip()])
print(f"\n翻译统计:")
print(f" - 原文行数: {original_lines}")
print(f" - 译文行数: {translated_lines}")
print(f" - 保存位置: {new_file_path}")
return True
except UnicodeDecodeError:
print(f"错误: 文件编码不是UTF-8,请确保文件使用UTF-8编码")
return False
except Exception as e:
print(f"错误: {e}")
return False
def main():
"""
主程序入口
"""
# 要翻译的文件路径
file_path = r"D:\Users\wangrusheng\Downloads\za.txt"
print("=" * 60)
print("文件翻译工具 v1.0")
print("功能: 读取文本文件,翻译成中文")
print("=" * 60)
# 检查文件是否存在
if not Path(file_path).exists():
print(f"警告: 指定文件不存在")
print(f"文件路径: {file_path}")
# 尝试寻找其他文件
downloads_dir = Path(r"D:\Users\wangrusheng\Downloads")
txt_files = list(downloads_dir.glob("*.txt"))
if txt_files:
print(f"\n在下载文件夹中找到以下txt文件:")
for i, f in enumerate(txt_files[:5], 1): # 只显示前5个
print(f" {i}. {f.name}")
choice = input("\n请选择要翻译的文件编号(1-5),或按回车退出: ").strip()
if choice and choice.isdigit() and 1 <= int(choice) <= len(txt_files[:5]):
file_path = str(txt_files[int(choice) - 1])
print(f"已选择: {file_path}")
else:
print("已取消")
return
else:
print("未找到任何txt文件,请检查文件路径")
return
# 执行翻译
print(f"\n开始处理文件: {file_path}")
success = translate_file(file_path)
if success:
print(f"\n✓ 翻译完成!")
else:
print(f"\n✗ 翻译失败")
if __name__ == "__main__":
main()
亲测可用,效果很好 速来体验 需要可自取
end