目录:
-
- [📋 项目概述](#📋 项目概述)
- 📋实现流程
- [🎼 完整实现代码](#🎼 完整实现代码)
- [🎯 RNN歌词生成核心原理讲解](#🎯 RNN歌词生成核心原理讲解)
-
- [1. RNN/LSTM的工作原理](#1. RNN/LSTM的工作原理)
- [2. 文本生成的关键技术](#2. 文本生成的关键技术)
-
- [2.1 序列到序列(Seq2Seq)](#2.1 序列到序列(Seq2Seq))
- [2.2 温度采样(Temperature Sampling)](#2.2 温度采样(Temperature Sampling))
- [3. 数据处理流程](#3. 数据处理流程)
- [🛠️ 进阶优化方案](#🛠️ 进阶优化方案)
-
- [1. 使用注意力机制](#1. 使用注意力机制)
- [2. Transformer模型](#2. Transformer模型)
- [3. 条件生成](#3. 条件生成)
- [🚀 实际应用扩展](#🚀 实际应用扩展)
-
- [1. Flask Web应用](#1. Flask Web应用)
- [2. 实时歌词生成API](#2. 实时歌词生成API)
- [3. 与音乐结合](#3. 与音乐结合)
📋 项目概述
我们将创建一个基于 RNN(LSTM)的 AI 歌词生成器,可以学习周杰伦、林俊杰等歌手的歌词风格,并生成新的歌词。
📋实现流程
python
1. 数据收集 → 2. 数据预处理 → 3. 文本向量化 → 4. 构建RNN模型
↓
5. 训练模型 → 6. 歌词生成 → 7. 模型优化 → 8. 部署应用
🎼 完整实现代码
python
# ==================== 0. 环境配置 ====================
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import matplotlib.pyplot as plt
import re
import os
import json
from collections import Counter
import random
import warnings
warnings.filterwarnings('ignore')
print("TensorFlow版本:", tf.__version__)
# ==================== 1. 数据收集与预处理 ====================
class LyricsDataCollector:
"""歌词数据收集器"""
def __init__(self):
self.lyrics = []
def load_from_file(self, filepath):
"""从文本文件加载歌词"""
with open(filepath, 'r', encoding='utf-8') as f:
content = f.read()
# 分割歌曲(假设每首歌用空行分隔)
songs = content.strip().split('\n\n')
for song in songs:
# 清理歌词
clean_lyrics = self.clean_lyrics(song)
if len(clean_lyrics) > 20: # 过滤太短的歌词
self.lyrics.append(clean_lyrics)
print(f"成功加载 {len(songs)} 首歌,有效歌词 {len(self.lyrics)} 首")
def load_from_api(self, artist_name):
"""从API获取歌词(示例)"""
# 这里可以使用QQ音乐、网易云音乐的API
# 由于API限制,这里提供模拟数据
print(f"模拟加载 {artist_name} 的歌词数据...")
# 示例歌词数据
sample_lyrics = [
"说不上为什么 我变得很主动\n"
"若爱上一个人 什么都会值得去做\n"
"我想大声宣布 对你依依不舍\n"
"连隔壁邻居都猜到我现在的感受",
"窗外的麻雀 在电线杆上多嘴\n"
"你说这一句 很有夏天的感觉\n"
"手中的铅笔 在纸上来来回回\n"
"我用几行字形容你是我的谁",
"为你弹奏萧邦的夜曲 纪念我死去的爱情\n"
"跟夜风一样的声音 心碎得很好听\n"
"手在键盘敲很轻 我给的思念很小心\n"
"你埋葬的地方叫幽冥"
]
for lyric in sample_lyrics:
clean_lyric = self.clean_lyrics(lyric)
self.lyrics.append(clean_lyric)
def clean_lyrics(self, text):
"""清理歌词文本"""
# 移除时间戳 [00:00.00]
text = re.sub(r'$$.*?$$', '', text)
# 移除标点符号(保留中文标点)
text = re.sub(r'[^\u4e00-\u9fa5a-zA-Z0-9\s]', ' ', text)
# 替换多个空格为单个空格
text = re.sub(r'\s+', ' ', text)
# 移除数字
text = re.sub(r'\d+', '', text)
return text.strip()
def get_statistics(self):
"""获取数据统计信息"""
if not self.lyrics:
return {}
total_chars = sum(len(lyric) for lyric in self.lyrics)
avg_length = total_chars / len(self.lyrics)
all_text = ' '.join(self.lyrics)
unique_chars = set(all_text)
return {
'total_songs': len(self.lyrics),
'total_characters': total_chars,
'average_length': avg_length,
'unique_characters': len(unique_chars),
'vocabulary': list(unique_chars)
}
def save_dataset(self, filename='lyrics_dataset.txt'):
"""保存处理后的数据集"""
with open(filename, 'w', encoding='utf-8') as f:
for lyric in self.lyrics:
f.write(lyric + '\n\n')
print(f"数据集已保存到 {filename}")
# ==================== 2. 文本向量化 ====================
class LyricsVectorizer:
"""歌词向量化处理器"""
def __init__(self, seq_length=50, step=3):
self.seq_length = seq_length # 序列长度
self.step = step # 滑动窗口步长
self.char_to_idx = {} # 字符到索引的映射
self.idx_to_char = {} # 索引到字符的映射
self.vocab_size = 0 # 词汇表大小
def create_vocabulary(self, text):
"""创建词汇表"""
# 统计字符频率
char_counter = Counter(text)
# 排序字符
sorted_chars = sorted(char_counter.items(), key=lambda x: -x[1])
chars = [char for char, _ in sorted_chars]
# 创建映射
self.char_to_idx = {char: idx for idx, char in enumerate(chars)}
self.idx_to_char = {idx: char for idx, char in enumerate(chars)}
self.vocab_size = len(chars)
print(f"词汇表大小: {self.vocab_size}")
print(f"最常见字符: {chars[:10]}")
return self.char_to_idx, self.idx_to_char
def text_to_sequences(self, text):
"""将文本转换为训练序列"""
sequences = []
next_chars = []
for i in range(0, len(text) - self.seq_length, self.step):
sequences.append(text[i:i + self.seq_length])
next_chars.append(text[i + self.seq_length])
print(f"生成 {len(sequences)} 个训练序列")
return sequences, next_chars
def vectorize_sequences(self, sequences, next_chars):
"""将序列向量化"""
# 初始化向量
X = np.zeros((len(sequences), self.seq_length, self.vocab_size), dtype=np.bool_)
y = np.zeros((len(sequences), self.vocab_size), dtype=np.bool_)
# 填充向量
for i, sequence in enumerate(sequences):
for t, char in enumerate(sequence):
X[i, t, self.char_to_idx[char]] = 1
y[i, self.char_to_idx[next_chars[i]]] = 1
return X, y
def save_mappings(self, filepath='mappings.json'):
"""保存字符映射"""
mappings = {
'char_to_idx': self.char_to_idx,
'idx_to_char': {int(k): v for k, v in self.idx_to_char.items()},
'vocab_size': self.vocab_size,
'seq_length': self.seq_length
}
with open(filepath, 'w', encoding='utf-8') as f:
json.dump(mappings, f, ensure_ascii=False, indent=2)
print(f"映射已保存到 {filepath}")
def load_mappings(self, filepath='mappings.json'):
"""加载字符映射"""
with open(filepath, 'r', encoding='utf-8') as f:
mappings = json.load(f)
self.char_to_idx = mappings['char_to_idx']
self.idx_to_char = {int(k): v for k, v in mappings['idx_to_char'].items()}
self.vocab_size = mappings['vocab_size']
self.seq_length = mappings.get('seq_length', 50)
return self
# ==================== 3. RNN模型构建 ====================
class LyricsGeneratorRNN:
"""歌词生成RNN模型"""
def __init__(self, vocab_size, seq_length):
self.vocab_size = vocab_size
self.seq_length = seq_length
self.model = None
self.history = None
def build_model(self, embedding_dim=256, lstm_units=512):
"""构建RNN模型"""
model = keras.Sequential([
# 嵌入层(可选,如果使用one-hot编码则不需要)
# layers.Embedding(self.vocab_size, embedding_dim, input_length=self.seq_length),
# LSTM层
layers.LSTM(lstm_units, return_sequences=True,
input_shape=(self.seq_length, self.vocab_size)),
layers.Dropout(0.3),
# 第二个LSTM层
layers.LSTM(lstm_units // 2, return_sequences=True),
layers.Dropout(0.2),
# 第三个LSTM层
layers.LSTM(lstm_units // 4),
layers.Dropout(0.1),
# 全连接层
layers.Dense(self.vocab_size, activation='softmax')
])
# 编译模型
model.compile(
optimizer=keras.optimizers.Adam(learning_rate=0.001),
loss='categorical_crossentropy',
metrics=['accuracy']
)
self.model = model
print("模型构建完成:")
model.summary()
return model
def train(self, X, y, epochs=100, batch_size=128, validation_split=0.1):
"""训练模型"""
callbacks = [
keras.callbacks.EarlyStopping(
monitor='val_loss', patience=10, restore_best_weights=True
),
keras.callbacks.ReduceLROnPlateau(
monitor='val_loss', factor=0.5, patience=5, min_lr=0.00001
),
keras.callbacks.ModelCheckpoint(
'best_model.h5', monitor='val_loss', save_best_only=True
)
]
print(f"开始训练...")
print(f"数据形状: X={X.shape}, y={y.shape}")
self.history = self.model.fit(
X, y,
epochs=epochs,
batch_size=batch_size,
validation_split=validation_split,
callbacks=callbacks,
verbose=1
)
return self.history
def plot_training_history(self):
"""绘制训练历史"""
if self.history is None:
print("没有训练历史")
return
fig, axes = plt.subplots(1, 2, figsize=(12, 4))
# 准确率曲线
axes[0].plot(self.history.history['accuracy'], label='训练准确率')
axes[0].plot(self.history.history['val_accuracy'], label='验证准确率')
axes[0].set_title('模型准确率')
axes[0].set_xlabel('Epoch')
axes[0].set_ylabel('准确率')
axes[0].legend()
axes[0].grid(True, alpha=0.3)
# 损失曲线
axes[1].plot(self.history.history['loss'], label='训练损失')
axes[1].plot(self.history.history['val_loss'], label='验证损失')
axes[1].set_title('模型损失')
axes[1].set_xlabel('Epoch')
axes[1].set_ylabel('损失')
axes[1].legend()
axes[1].grid(True, alpha=0.3)
plt.tight_layout()
plt.show()
# ==================== 4. 歌词生成器 ====================
class LyricsGenerator:
"""歌词生成器"""
def __init__(self, model, vectorizer):
self.model = model
self.vectorizer = vectorizer
def sample_with_temperature(self, preds, temperature=1.0):
"""使用温度参数采样下一个字符"""
preds = np.asarray(preds).astype('float64')
if temperature <= 0:
return np.argmax(preds)
preds = np.log(preds) / temperature
exp_preds = np.exp(preds)
preds = exp_preds / np.sum(exp_preds)
probas = np.random.multinomial(1, preds, 1)
return np.argmax(probas)
def generate_lyrics(self, seed_text, length=200, temperature=0.8):
"""生成歌词"""
generated = seed_text
for _ in range(length):
# 将种子文本向量化
x_pred = np.zeros((1, self.vectorizer.seq_length, self.vectorizer.vocab_size))
# 填充输入
for t, char in enumerate(seed_text):
if char in self.vectorizer.char_to_idx:
x_pred[0, t, self.vectorizer.char_to_idx[char]] = 1
# 预测下一个字符
preds = self.model.predict(x_pred, verbose=0)[0]
next_index = self.sample_with_temperature(preds, temperature)
next_char = self.vectorizer.idx_to_char[next_index]
# 添加到生成文本
generated += next_char
# 更新种子文本
seed_text = seed_text[1:] + next_char
return generated
def format_lyrics(self, text, line_length=10):
"""格式化歌词"""
# 插入换行符
lines = []
current_line = ""
for char in text:
current_line += char
if len(current_line) >= line_length and char in [',', '。', ';', '!', '?', ' ']:
lines.append(current_line.strip())
current_line = ""
if current_line:
lines.append(current_line.strip())
return '\n'.join(lines)
def generate_song(self, seed_text=None, length=300, temperature=0.7):
"""生成完整歌曲"""
if seed_text is None:
# 随机选择种子文本
all_chars = list(self.vectorizer.char_to_idx.keys())
seed_text = ''.join(random.choice(all_chars) for _ in range(self.vectorizer.seq_length))
print(f"种子文本: {seed_text}")
print(f"温度参数: {temperature}")
# 生成歌词
raw_lyrics = self.generate_lyrics(seed_text, length, temperature)
# 格式化
formatted_lyrics = self.format_lyrics(raw_lyrics)
return formatted_lyrics, raw_lyrics
# ==================== 5. 完整工作流 ====================
def main_workflow():
"""主工作流程"""
print("="*60)
print("🎵 AI歌词生成器 - RNN实现")
print("="*60)
# 1. 数据收集
print("\n1. 📥 数据收集阶段")
collector = LyricsDataCollector()
# 尝试加载本地数据
if os.path.exists('chinese_lyrics.txt'):
collector.load_from_file('chinese_lyrics.txt')
else:
# 使用模拟数据
print("⚠ 未找到歌词文件,使用模拟数据...")
collector.load_from_api("周杰伦")
collector.load_from_api("林俊杰")
# 创建模拟歌词文件
with open('chinese_lyrics.txt', 'w', encoding='utf-8') as f:
for lyric in collector.lyrics:
f.write(lyric + '\n\n')
# 显示统计数据
stats = collector.get_statistics()
print(f"\n📊 数据统计:")
print(f" 歌曲数量: {stats['total_songs']}")
print(f" 总字符数: {stats['total_characters']}")
print(f" 平均长度: {stats['average_length']:.1f}")
print(f" 唯一字符数: {stats['unique_characters']}")
# 保存数据集
collector.save_dataset()
# 2. 数据预处理和向量化
print("\n\n2. 🔧 数据预处理阶段")
# 加载所有文本
with open('lyrics_dataset.txt', 'r', encoding='utf-8') as f:
full_text = f.read().replace('\n', ' ')
# 创建向量化器
vectorizer = LyricsVectorizer(seq_length=50, step=3)
# 创建词汇表
char_to_idx, idx_to_char = vectorizer.create_vocabulary(full_text)
# 创建训练序列
sequences, next_chars = vectorizer.text_to_sequences(full_text)
# 向量化
X, y = vectorizer.vectorize_sequences(sequences, next_chars)
print(f"\n📊 训练数据形状:")
print(f" X: {X.shape}")
print(f" y: {y.shape}")
# 保存映射
vectorizer.save_mappings()
# 3. 构建和训练模型
print("\n\n3. 🧠 模型构建与训练阶段")
generator = LyricsGeneratorRNN(
vocab_size=vectorizer.vocab_size,
seq_length=vectorizer.seq_length
)
# 构建模型
model = generator.build_model(lstm_units=256)
# 训练模型(为了演示,使用较少epochs)
print("\n⏳ 开始训练模型...")
history = generator.train(
X, y,
epochs=30, # 实际可能需要100+ epochs
batch_size=64,
validation_split=0.2
)
# 绘制训练历史
generator.plot_training_history()
# 4. 歌词生成
print("\n\n4. 🎤 歌词生成阶段")
lyrics_gen = LyricsGenerator(model, vectorizer)
# 示例种子文本
seed_examples = [
"窗外的麻雀", # 周杰伦风格
"说不上为什么", # 简单爱风格
"雨纷纷旧故里", # 中国风
"爱情来的太快", # 现代风
"月光洒在窗前" # 抒情风
]
print("\n🔥 生成示例歌词:")
print("-"*40)
for i, seed in enumerate(seed_examples[:3]):
print(f"\n示例 {i+1} - 种子: '{seed}'")
print("-"*30)
# 生成不同温度下的歌词
for temp in [0.2, 0.5, 0.8, 1.2]:
lyrics, _ = lyrics_gen.generate_song(
seed_text=seed,
length=150,
temperature=temp
)
print(f"\n温度 {temp}:")
print(lyrics[:200] + "...")
print("-"*30)
# 5. 生成完整歌曲
print("\n\n5. 🎼 生成完整歌曲")
# 让用户选择种子文本
user_seed = input("\n请输入种子文本(按回车使用默认): ").strip()
if not user_seed:
user_seed = random.choice(seed_examples)
# 生成歌曲
print(f"\n使用种子文本: '{user_seed}'")
# 调整参数
length = int(input("请输入歌词长度(默认300): ") or "300")
temperature = float(input("请输入温度参数(默认0.7): ") or "0.7")
print("\n" + "="*60)
print("🎵 生成的歌曲 🎵")
print("="*60)
song_lyrics, raw_lyrics = lyrics_gen.generate_song(
seed_text=user_seed,
length=length,
temperature=temperature
)
print("\n" + song_lyrics)
# 6. 保存生成的歌词
print("\n\n6. 💾 保存结果")
# 保存到文件
with open('generated_song.txt', 'w', encoding='utf-8') as f:
f.write("="*60 + "\n")
f.write("🎵 AI生成的歌词\n")
f.write("="*60 + "\n\n")
f.write(f"种子文本: {user_seed}\n")
f.write(f"温度参数: {temperature}\n")
f.write(f"生成长度: {length}\n\n")
f.write("歌词内容:\n")
f.write("="*40 + "\n")
f.write(song_lyrics)
f.write("\n" + "="*40 + "\n")
print("✓ 歌词已保存到 generated_song.txt")
# 7. 高级功能演示
print("\n\n7. 🔬 高级功能演示")
# 7.1 生成不同风格的歌词
print("\n🔍 不同风格的歌词生成:")
styles = {
"浪漫": "月光下你的笑容",
"悲伤": "雨夜里的思念",
"励志": "梦想在心中燃烧",
"古风": "江山如画剑如虹",
"嘻哈": "节奏在血液流淌"
}
for style_name, style_seed in styles.items():
print(f"\n{style_name}风格:")
lyrics, _ = lyrics_gen.generate_song(
seed_text=style_seed,
length=100,
temperature=0.7
)
print(lyrics[:150] + "...")
# 7.2 批量生成
print("\n📊 批量生成示例:")
for i in range(3):
random_seed = ''.join(random.choice(list(char_to_idx.keys()))
for _ in range(10))
lyrics, _ = lyrics_gen.generate_song(
seed_text=random_seed,
length=80,
temperature=0.6
)
print(f"\n批量 {i+1}: {lyrics}")
return model, vectorizer, lyrics_gen
# ==================== 6. 运行主程序 ====================
if __name__ == "__main__":
# 运行完整工作流
try:
model, vectorizer, generator = main_workflow()
# 保存最终模型
model.save('lyrics_generator_final.h5')
print("\n✅ 模型已保存为 'lyrics_generator_final.h5'")
print("\n🎉 AI歌词生成器完成!")
print("📁 生成的文件:")
print(" - lyrics_dataset.txt # 处理后的歌词数据")
print(" - mappings.json # 字符映射")
print(" - best_model.h5 # 最佳模型")
print(" - lyrics_generator_final.h5 # 最终模型")
print(" - generated_song.txt # 生成的歌词")
except Exception as e:
print(f"\n❌ 发生错误: {e}")
import traceback
traceback.print_exc()
🎯 RNN歌词生成核心原理讲解
1. RNN/LSTM的工作原理
python
# 传统RNN的问题:梯度消失/爆炸
# LSTM的解决方案:三个门控机制
# 遗忘门:决定忘记什么信息
# 输入门:决定存储什么信息
# 输出门:决定输出什么信息
2. 文本生成的关键技术
2.1 序列到序列(Seq2Seq)
python
输入序列: ["窗", "外", "的", "麻", "雀"]
输出序列: ["在", "电", "线", "杆", "上", "多", "嘴"]
2.2 温度采样(Temperature Sampling)
python
def sample_with_temperature(preds, temperature=1.0):
"""控制生成文本的随机性"""
# temperature低 → 更确定,重复性高
# temperature高 → 更随机,创造性高
# temperature=1 → 原始概率分布
3. 数据处理流程
python
原始歌词 → 清洗 → 分词 → 创建词汇表 → one-hot编码
↓
创建序列 → 滑动窗口 → 训练数据 → RNN训练
🛠️ 进阶优化方案
1. 使用注意力机制
python
class AttentionLayer(layers.Layer):
"""注意力机制层"""
def __init__(self, units):
super().__init__()
self.W1 = layers.Dense(units)
self.W2 = layers.Dense(units)
self.V = layers.Dense(1)
def call(self, features, hidden):
# 实现注意力权重计算
pass
2. Transformer模型
python
# 使用Transformer替代RNN
def build_transformer_model(vocab_size, seq_length):
model = keras.Sequential([
layers.Embedding(vocab_size, 256, input_length=seq_length),
layers.Transformer(num_heads=8, dff=512, dropout_rate=0.1),
layers.GlobalAveragePooling1D(),
layers.Dense(vocab_size, activation='softmax')
])
return model
3. 条件生成
python
# 根据风格、情感生成歌词
def generate_with_style(seed_text, style="romantic", emotion="happy"):
"""条件歌词生成"""
# 将风格和情感编码为向量
style_vector = encode_style(style)
emotion_vector = encode_emotion(emotion)
# 合并到输入中
combined_input = concatenate([seed_embedding, style_vector, emotion_vector])
# 生成歌词
return model.predict(combined_input)
🚀 实际应用扩展
1. Flask Web应用
python
# app.py
from flask import Flask, render_template, request, jsonify
app = Flask(__name__)
model = keras.models.load_model('lyrics_generator_final.h5')
@app.route('/')
def index():
return render_template('index.html')
@app.route('/generate', methods=['POST'])
def generate():
seed = request.json.get('seed', '')
style = request.json.get('style', 'default')
length = int(request.json.get('length', 200))
# 生成歌词
lyrics = generator.generate_lyrics(seed, length)
return jsonify({
'success': True,
'lyrics': lyrics,
'seed': seed
})
2. 实时歌词生成API
python
# api.py - FastAPI实现
from fastapi import FastAPI
from pydantic import BaseModel
app = FastAPI()
class GenerationRequest(BaseModel):
seed_text: str = "月光"
temperature: float = 0.7
length: int = 100
style: str = "default"
@app.post("/generate-lyrics")
async def generate_lyrics(request: GenerationRequest):
lyrics = generator.generate_song(
seed_text=request.seed_text,
length=request.length,
temperature=request.temperature
)
return {
"lyrics": lyrics,
"parameters": request.dict(),
"timestamp": datetime.now().isoformat()
}
3. 与音乐结合
python
class MusicLyricsGenerator:
"""音乐和歌词结合生成"""
def generate_with_melody(self, melody_track):
"""根据旋律生成歌词"""
# 分析旋律特征
melody_features = extract_melody_features(melody_track)
# 根据旋律节奏生成歌词
lyrics = []
for note_duration in melody_features['note_durations']:
# 根据音符时长决定歌词长度
char_count = int(note_duration * 3) # 假设比例
# 生成对应长度的歌词片段
lyric_segment = self.generate_segment(char_count)
lyrics.append(lyric_segment)
return ' '.join(lyrics)