
需要安装edge_tts和FFMPEG
pip install edge_tts
FFMPEG安装参考https://blog.csdn.net/Natsuago/article/details/143231558
python
import edge_tts
import asyncio
import os
import re
import tkinter as tk
from tkinter import ttk, messagebox, filedialog
import threading
import sys
# ===================== 配置FFmpeg路径(自动检测或手动配置) =====================
# 优先使用系统环境变量中的ffmpeg,如果没有则使用指定路径
FFMPEG_PATH = os.environ.get("FFMPEG_PATH", r"D:\Program Files\ffmpeg-7.1.1-essentials_build\bin\ffmpeg.exe")
FFPROBE_PATH = os.environ.get("FFPROBE_PATH", r"D:\Program Files\ffmpeg-7.1.1-essentials_build\bin\ffprobe.exe")
# 验证FFmpeg路径
def check_ffmpeg():
"""检查FFmpeg是否可用"""
if not os.path.exists(FFMPEG_PATH):
messagebox.showwarning("警告", f"FFmpeg未找到:{FFMPEG_PATH}\n请检查路径配置!")
return False
return True
# 配置FFmpeg环境变量
os.environ["FFMPEG_PATH"] = FFMPEG_PATH
os.environ["FFPROBE_PATH"] = FFPROBE_PATH
# ===================== 扩展版语音音色列表 =====================
VOICE_OPTIONS = {
# ==================== 中文普通话 - 女声 ====================
"【中文】晓晓 (默认)": "zh-CN-XiaoxiaoNeural",
"【中文】小艺": "zh-CN-XiaoyiNeural",
"【中文】小燕": "zh-CN-XiaoyanNeural",
"【中文】小云": "zh-CN-XiaoyunNeural",
"【中文】小梦": "zh-CN-XiaomengNeural",
"【中文】小希": "zh-CN-XiaoxiNeural",
"【中文】小雅": "zh-CN-XiaoyaNeural",
"【中文】小宁": "zh-CN-XiaoningNeural",
"【中文】小莉": "zh-CN-LiNeural",
"【中文】小月": "zh-CN-YueNeural",
# ==================== 中文普通话 - 男声 ====================
"【中文】云希": "zh-CN-YunxiNeural",
"【中文】云健": "zh-CN-YunjianNeural",
"【中文】云扬": "zh-CN-YunyangNeural",
"【中文】晓辰": "zh-CN-XiaochenNeural",
"【中文】晓宇": "zh-CN-XiaoyuNeural",
"【中文】小伟": "zh-CN-WeiNeural",
"【中文】小鹏": "zh-CN-PengNeural",
"【中文】小川": "zh-CN-ChuanNeural",
# ==================== 中文方言 ====================
"【方言】粤语-小玲": "zh-HK-XiaolingNeural",
"【方言】粤语-大雄": "zh-HK-DaanNeural",
"【方言】台湾话-晓萱": "zh-TW-HsiaoHsuanNeural",
"【方言】台湾话-晓东": "zh-TW-HsiaoTungNeural",
# ==================== 英文 - 美式 ====================
"【英文】Aria (女声)": "en-US-AriaNeural",
"【英文】Jenny (女声)": "en-US-JennyNeural",
"【英文】Michelle (女声)": "en-US-MichelleNeural",
"【英文】Sara (女声)": "en-US-SaraNeural",
"【英文】Brandon (男声)": "en-US-BrandonNeural",
"【英文】Christopher (男声)": "en-US-ChristopherNeural",
"【英文】Eric (男声)": "en-US-EricNeural",
"【英文】Guy (男声)": "en-US-GuyNeural",
# ==================== 英文 - 英式 ====================
"【英文】Libby (英式女声)": "en-GB-LibbyNeural",
"【英文】Maisie (英式女声)": "en-GB-MaisieNeural",
"【英文】Ryan (英式男声)": "en-GB-RyanNeural",
"【英文】Thomas (英式男声)": "en-GB-ThomasNeural",
# ==================== 其他特色音色 ====================
"【英文】小女孩-Emma": "en-US-EmmaNeural",
"【英文】小男孩-Brian": "en-US-BrianNeural",
"【中文】情感女声-晓晨": "zh-CN-XiaochenNeural",
"【中文】轻柔女声-晓雪": "zh-CN-XiaoxueNeural"
}
def get_valid_filename(text):
"""将文本转换为合法的文件名"""
# 移除Windows非法字符
filename = re.sub(r'[\\/:*?"<>|]', '', text)
# 移除空白字符并限制长度
filename = filename.strip()
if len(filename) > 50:
filename = filename[:47] + "..."
if not filename:
filename = "默认语音"
return f"{filename}.mp3"
async def generate_voice(text, voice, rate, volume, pitch, save_path):
"""异步生成语音文件"""
try:
# 创建Communicate实例
communicate = edge_tts.Communicate(
text=text,
voice=voice,
rate=rate,
volume=volume,
pitch=pitch
)
# 生成并保存文件
await communicate.save(save_path)
return True
except Exception as e:
return str(e)
def run_async_task(loop, coro):
"""在指定事件循环中运行异步任务"""
return loop.run_until_complete(coro)
def on_generate():
"""生成语音按钮点击事件(优化版)"""
# 检查FFmpeg
if not check_ffmpeg():
return
# 获取输入文本
text = text_input.get("1.0", tk.END).strip()
if not text:
messagebox.showwarning("警告", "请输入要转换的文本!")
return
# 获取选中的音色
voice_name = voice_var.get()
voice_code = VOICE_OPTIONS[voice_name]
# 获取语速/音量/音调(转为整数,确保格式正确)
try:
rate_value = int(rate_slider.get())
volume_value = int(volume_slider.get())
pitch_value = int(pitch_slider.get())
rate = f"{rate_value:+d}%" # 确保有正负号
volume = f"{volume_value:+d}%"
pitch = f"{pitch_value:+d}Hz"
except:
messagebox.showerror("错误", "参数转换失败,请检查滑块设置!")
return
# 生成合法文件名
filename = get_valid_filename(text[:100]) # 只取前100个字符作为文件名
# 选择保存路径
save_path = filedialog.asksaveasfilename(
defaultextension=".mp3",
initialfile=filename,
filetypes=[("MP3文件", "*.mp3"), ("所有文件", "*.*")],
title="保存语音文件"
)
if not save_path:
return
# 禁用按钮防止重复点击
generate_btn.config(state="disabled")
generate_btn.config(text="正在生成...")
root.update()
try:
# 在新线程中运行异步任务,避免GUI冻结
def generate_thread():
loop = asyncio.new_event_loop()
asyncio.set_event_loop(loop)
result = loop.run_until_complete(generate_voice(
text, voice_code, rate, volume, pitch, save_path
))
# 回到主线程更新UI
def show_result():
generate_btn.config(state="normal")
generate_btn.config(text="生成语音文件")
if result is True:
messagebox.showinfo("成功", f"语音文件已生成:\n{save_path}")
# 可选:打开文件所在目录
if messagebox.askyesno("提示", "是否打开文件所在目录?"):
os.startfile(os.path.dirname(save_path))
else:
messagebox.showerror("错误", f"生成失败:\n{result}")
root.after(0, show_result)
# 启动生成线程
thread = threading.Thread(target=generate_thread, daemon=True)
thread.start()
except Exception as e:
generate_btn.config(state="normal")
generate_btn.config(text="生成语音文件")
messagebox.showerror("错误", f"程序异常:{str(e)}")
def clear_text():
"""清空文本输入框"""
text_input.delete("1.0", tk.END)
def load_text_file():
"""从文件加载文本"""
file_path = filedialog.askopenfilename(
filetypes=[("文本文件", "*.txt"), ("所有文件", "*.*")],
title="选择文本文件"
)
if file_path:
try:
with open(file_path, "r", encoding="utf-8") as f:
text = f.read()
text_input.delete("1.0", tk.END)
text_input.insert("1.0", text)
except Exception as e:
messagebox.showerror("错误", f"读取文件失败:{str(e)}")
# ===================== 创建GUI界面 =====================
def create_gui():
global root, text_input, voice_var, rate_slider, volume_slider, pitch_slider, generate_btn
root = tk.Tk()
root.title("Edge TTS 语音生成工具 v2.2")
root.geometry("700x600")
root.resizable(True, True)
# 设置字体
default_font = ("微软雅黑", 9)
root.option_add("*Font", default_font)
# 1. 标题和说明
title_label = ttk.Label(root, text="Edge TTS 语音生成工具 (扩展音色版)", font=("微软雅黑", 12, "bold"))
title_label.pack(pady=10)
# 2. 文本输入区域(带滚动条)
frame_text = ttk.Frame(root)
frame_text.pack(padx=20, pady=5, fill=tk.BOTH, expand=True)
ttk.Label(frame_text, text="请输入要转换的文本:").anchor(tk.W)
text_scroll = ttk.Scrollbar(frame_text)
text_input = tk.Text(frame_text, width=80, height=10, wrap=tk.WORD,
yscrollcommand=text_scroll.set, font=("微软雅黑", 10))
text_scroll.config(command=text_input.yview)
text_input.pack(side=tk.LEFT, fill=tk.BOTH, expand=True)
text_scroll.pack(side=tk.RIGHT, fill=tk.Y)
# 文本操作按钮
frame_text_buttons = ttk.Frame(root)
frame_text_buttons.pack(padx=20, pady=5, fill=tk.X)
ttk.Button(frame_text_buttons, text="清空文本", command=clear_text).pack(side=tk.LEFT, padx=5)
ttk.Button(frame_text_buttons, text="加载文本文件", command=load_text_file).pack(side=tk.LEFT, padx=5)
# 3. 参数设置区域
frame_params = ttk.LabelFrame(root, text="语音参数设置")
frame_params.pack(padx=20, pady=10, fill=tk.X)
# 3.1 音色选择(加宽下拉框以显示完整音色名称)
frame_voice = ttk.Frame(frame_params)
frame_voice.pack(padx=20, pady=8, fill=tk.X)
ttk.Label(frame_voice, text="语音音色:", width=10).pack(side=tk.LEFT)
voice_var = tk.StringVar(value=list(VOICE_OPTIONS.keys())[0])
voice_combobox = ttk.Combobox(frame_voice, textvariable=voice_var,
values=list(VOICE_OPTIONS.keys()), state="readonly", width=45)
voice_combobox.pack(side=tk.LEFT, padx=5)
# 3.2 语速调节(默认值设置为10)
frame_rate = ttk.Frame(frame_params)
frame_rate.pack(padx=20, pady=5, fill=tk.X)
ttk.Label(frame_rate, text="语速:", width=10).pack(side=tk.LEFT)
rate_slider = ttk.Scale(frame_rate, from_=-100, to=100, orient=tk.HORIZONTAL, length=450)
rate_slider.set(10) # 关键修改:默认语速设置为10
rate_slider.pack(side=tk.LEFT, padx=5)
rate_label = ttk.Label(frame_rate, text="10%", width=5) # 默认显示10%
rate_label.pack(side=tk.LEFT)
def update_rate_label(event):
rate_label.config(text=f"{int(rate_slider.get())}%")
rate_slider.bind("<Motion>", update_rate_label)
rate_slider.bind("<ButtonRelease>", update_rate_label)
# 3.3 音量调节
frame_volume = ttk.Frame(frame_params)
frame_volume.pack(padx=20, pady=5, fill=tk.X)
ttk.Label(frame_volume, text="音量:", width=10).pack(side=tk.LEFT)
volume_slider = ttk.Scale(frame_volume, from_=-100, to=100, orient=tk.HORIZONTAL, length=450)
volume_slider.set(0)
volume_slider.pack(side=tk.LEFT, padx=5)
volume_label = ttk.Label(frame_volume, text="0%", width=5)
volume_label.pack(side=tk.LEFT)
def update_volume_label(event):
volume_label.config(text=f"{int(volume_slider.get())}%")
volume_slider.bind("<Motion>", update_volume_label)
volume_slider.bind("<ButtonRelease>", update_volume_label)
# 3.4 音调调节
frame_pitch = ttk.Frame(frame_params)
frame_pitch.pack(padx=20, pady=5, fill=tk.X)
ttk.Label(frame_pitch, text="音调:", width=10).pack(side=tk.LEFT)
pitch_slider = ttk.Scale(frame_pitch, from_=-50, to=50, orient=tk.HORIZONTAL, length=450)
pitch_slider.set(0)
pitch_slider.pack(side=tk.LEFT, padx=5)
pitch_label = ttk.Label(frame_pitch, text="0Hz", width=5)
pitch_label.pack(side=tk.LEFT)
def update_pitch_label(event):
pitch_label.config(text=f"{int(pitch_slider.get())}Hz")
pitch_slider.bind("<Motion>", update_pitch_label)
pitch_slider.bind("<ButtonRelease>", update_pitch_label)
# 4. 生成按钮
frame_button = ttk.Frame(root)
frame_button.pack(padx=20, pady=20)
generate_btn = ttk.Button(frame_button, text="生成语音文件", command=on_generate,
width=25, style="Accent.TButton")
generate_btn.pack(pady=10)
# 5. 状态栏
status_var = tk.StringVar(value="就绪 | 扩展音色库已加载 | 语速默认值:10% | 支持中文/英文/方言")
status_bar = ttk.Label(root, textvariable=status_var, relief=tk.SUNKEN, anchor=tk.W)
status_bar.pack(side=tk.BOTTOM, fill=tk.X)
# 检查FFmpeg
root.after(100, check_ffmpeg)
root.mainloop()
# 适配Windows系统的异步策略
if sys.platform == 'win32':
asyncio.set_event_loop_policy(asyncio.WindowsSelectorEventLoopPolicy())
# 启动GUI
if __name__ == "__main__":
create_gui()