python大版本
3.11.14
requirements.txt
powershell
audioread==3.1.0
certifi==2026.1.4
cffi==2.0.0
charset-normalizer==3.4.4
colorama==0.4.6
contourpy==1.3.3
cycler==0.12.1
decorator==5.2.1
fonttools==4.61.1
idna==3.11
ImageIO==2.37.2
imageio-ffmpeg==0.6.0
joblib==1.5.3
kiwisolver==1.4.9
lazy_loader==0.4
librosa==0.11.0
llvmlite==0.46.0
matplotlib==3.10.8
moviepy==2.2.1
msgpack==1.1.2
noisereduce==3.0.3
numba==0.63.1
numpy==2.3.5
packaging==25.0
pedalboard==0.9.19
pillow==11.3.0
platformdirs==4.5.1
pooch==1.8.2
proglog==0.1.12
pycparser==2.23
pyparsing==3.3.1
python-dateutil==2.9.0.post0
python-dotenv==1.2.1
requests==2.32.5
scikit-learn==1.8.0
scipy==1.16.3
six==1.17.0
soundfile==0.13.1
soxr==1.0.0
threadpoolctl==3.6.0
tqdm==4.67.1
typing_extensions==4.15.0
urllib3==2.6.3
代码
python
import librosa
import numpy as np
import matplotlib.pyplot as plt
from moviepy import VideoClip, AudioFileClip
import os
import warnings
warnings.filterwarnings('ignore')
# -------------------------- 1. 配置参数【重点:新增慢速控制参数】 --------------------------
AUDIO_PATH = "D:/sd/ComfyUI/output/audio/ComfyUI_00040_.flac"
OUTPUT_VIDEO = "audio_spectrum_visual.mp4"
DURATION = None
FPS = 30
WIDTH, HEIGHT = 1280, 720
CMAP = "coolwarm" # 温和配色保留
BG_COLOR = "#0a0a0a"
BAR_COUNT = 120
BAR_WIDTH = 0.85
# ✅ 核心慢速参数(重点调整!)
SMOOTH_FACTOR = 0.08 # 比原来0.2更小,柱子变化更慢;范围0.05~0.2,越小越慢
STFT_OVERLAP_FACTOR = 0.8 # 重叠率80%,进一步平滑振幅;范围0.7~0.9,越大越平滑
MAX_JUMP = 0.05 # 限制单帧柱子最大变化幅度,杜绝突变;范围0.03~0.1,越小越稳
# -------------------------- 2. 加载音频 --------------------------
y, sr = librosa.load(AUDIO_PATH, sr=None, duration=DURATION)
audio_duration = librosa.get_duration(y=y, sr=sr)
y = librosa.effects.preemphasis(y)
print(f"✅ 音频加载完成,时长:{audio_duration:.2f}秒,采样率:{sr}Hz")
# -------------------------- 3. 绘图样式配置 --------------------------
plt.rcParams['figure.figsize'] = (WIDTH/100, HEIGHT/100)
plt.rcParams['figure.dpi'] = 100
plt.rcParams['axes.facecolor'] = BG_COLOR
plt.rcParams['figure.facecolor'] = BG_COLOR
plt.rcParams['figure.subplot.left'] = 0
plt.rcParams['figure.subplot.right'] = 1
plt.rcParams['figure.subplot.bottom'] = 0
plt.rcParams['figure.subplot.top'] = 1
# -------------------------- 4. 核心慢速优化+稳定柱状图【保留所有修复】 --------------------------
prev_amplitude = np.zeros(BAR_COUNT, dtype=np.float32)
n_fft = 2048
# 按重叠率计算hop_length,重叠越高,帧间变化越平缓
hop_length = int(n_fft * (1 - STFT_OVERLAP_FACTOR))
win_length = 1792
def make_frame(t):
global prev_amplitude
plt.clf()
ax = plt.gca()
ax.set_facecolor(BG_COLOR)
# 音频片段越界填充,保留修复逻辑
frame_start = int(t * sr)
frame_end = frame_start + n_fft
if frame_end > len(y):
y_slice = np.pad(y[-n_fft:], (0, frame_end - len(y)), mode='constant')
else:
y_slice = y[frame_start:frame_end]
# STFT计算+错误捕获,保留修复逻辑
try:
D = librosa.stft(
y_slice,
n_fft=n_fft,
hop_length=hop_length,
win_length=win_length,
window='hann'
)
amplitude = np.abs(D)
freq_bins = np.mean(amplitude, axis=1)[:BAR_COUNT]
# 防止除0,避免NaN
ref_val = np.max(freq_bins) if np.max(freq_bins) > 0 else 1
freq_bins = librosa.amplitude_to_db(freq_bins, ref=ref_val)
# 归一化+防极值
freq_min, freq_max = freq_bins.min(), freq_bins.max()
if freq_max - freq_min < 1e-6:
freq_bins = np.zeros_like(freq_bins)
else:
freq_bins = (freq_bins - freq_min) / (freq_max - freq_min)
freq_bins = np.clip(freq_bins, 0.0, 1.0)
except:
freq_bins = prev_amplitude.copy()
# ✅ 核心优化1:降低平滑系数,继承更多历史值
smoothed = (SMOOTH_FACTOR * freq_bins) + ((1 - SMOOTH_FACTOR) * prev_amplitude)
# ✅ 核心优化2:限制单帧最大变化幅度,彻底杜绝突变
delta = np.abs(smoothed - prev_amplitude)
mask = delta > MAX_JUMP
smoothed[mask] = prev_amplitude[mask] + np.sign(smoothed[mask] - prev_amplitude[mask]) * MAX_JUMP
# 强制更新历史值,防止停滞
prev_amplitude = smoothed.copy()
# 温和配色绘制,保留优化
x = np.arange(len(prev_amplitude))
bars = ax.bar(
x, prev_amplitude,
width=BAR_WIDTH,
color=plt.cm.get_cmap(CMAP)(prev_amplitude * 0.9)
)
ax.set_xlim(0, BAR_COUNT)
ax.set_ylim(0, 1.1)
ax.axis('off')
fig = plt.gcf()
fig.canvas.draw()
frame = np.array(fig.canvas.renderer.buffer_rgba())[:, :, :3]
return frame
# -------------------------- 5. 导出视频 --------------------------
if __name__ == "__main__":
video_clip = VideoClip(make_frame, duration=audio_duration)
audio_clip = AudioFileClip(AUDIO_PATH)
video_clip = video_clip.with_audio(audio_clip)
video_clip.write_videofile(
OUTPUT_VIDEO,
fps=FPS,
codec="libx264",
audio_codec="aac",
bitrate="6000k",
threads=os.cpu_count(),
logger=None
)
print(f"\n🎉 视频生成成功!文件路径:{OUTPUT_VIDEO}")