python
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy.signal import savgol_filter, find_peaks
from scipy.interpolate import interp1d
from sklearn.ensemble import RandomForestRegressor
from sklearn.svm import SVR
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_absolute_error, mean_squared_error
import warnings
warnings.filterwarnings('ignore')
class BatteryHealthPredictor:
def __init__(self, window_size=51, poly_order=3):
"""
初始化电池健康预测器
参数:
window_size: Savitzky-Golay滤波器窗口大小
poly_order: Savitzky-Golay滤波器多项式阶数
"""
self.window_size = window_size
self.poly_order = poly_order
self.scaler = StandardScaler()
self.model = None
self.feature_names = []
def load_data(self, file_path):
"""
加载电池循环数据
假设数据格式: 循环编号, 电压, 电流, 容量, 时间等
"""
try:
self.data = pd.read_csv(file_path)
print(f"数据加载成功,共 {len(self.data)} 行")
return True
except Exception as e:
print(f"数据加载失败: {e}")
return False
def preprocess_voltage_capacity_data(self, cycle_numbers):
"""
预处理电压-容量数据
"""
processed_data = {}
for cycle in cycle_numbers:
cycle_data = self.data[self.data['cycle'] == cycle].copy()
if len(cycle_data) < 10: # 跳过数据点过少的循环
continue
# 按电压排序并去除重复电压值
cycle_data = cycle_data.sort_values('voltage')
cycle_data = cycle_data.drop_duplicates('voltage', keep='first')
# 确保电压单调递增
if not cycle_data['voltage'].is_monotonic_increasing:
cycle_data = cycle_data[cycle_data['voltage'].diff() > 0]
processed_data[cycle] = {
'voltage': cycle_data['voltage'].values,
'capacity': cycle_data['capacity'].values
}
return processed_data
def calculate_ic_curve(self, voltage, capacity):
"""
计算增量容量(IC)曲线
IC = dQ/dV
"""
try:
# 使用中心差分法计算微分
dQ = np.diff(capacity)
dV = np.diff(voltage)
ic_raw = dQ / dV
# 对应的电压点(中心点)
voltage_ic = (voltage[:-1] + voltage[1:]) / 2
# 使用Savitzky-Golay滤波器平滑IC曲线
if len(ic_raw) > self.window_size:
ic_smooth = savgol_filter(ic_raw, self.window_size, self.poly_order)
else:
ic_smooth = ic_raw
return voltage_ic, ic_smooth, ic_raw
except Exception as e:
print(f"IC曲线计算错误: {e}")
return None, None, None
def calculate_dv_curve(self, voltage, capacity):
"""
计算差分电压(DV)曲线
DV = dV/dQ
"""
try:
# 使用中心差分法计算微分
dV = np.diff(voltage)
dQ = np.diff(capacity)
dv_raw = dV / dQ
# 对应的容量点(中心点)
capacity_dv = (capacity[:-1] + capacity[1:]) / 2
# 平滑处理
if len(dv_raw) > self.window_size:
dv_smooth = savgol_filter(dv_raw, self.window_size, self.poly_order)
else:
dv_smooth = dv_raw
return capacity_dv, dv_smooth, dv_raw
except Exception as e:
print(f"DV曲线计算错误: {e}")
return None, None, None
def extract_ic_features(self, voltage_ic, ic_curve):
"""
提取IC曲线特征
"""
features = {}
try:
# 寻找IC曲线峰值
peaks, properties = find_peaks(ic_curve, height=0.01, distance=10)
if len(peaks) > 0:
# 主峰特征
main_peak_idx = peaks[np.argmax(properties['peak_heights'])]
features['ic_peak_height'] = ic_curve[main_peak_idx]
features['ic_peak_voltage'] = voltage_ic[main_peak_idx]
# 峰面积(近似积分)
features['ic_peak_area'] = np.trapz(np.clip(ic_curve, 0, None), voltage_ic)
# 峰宽度
if 'widths' in properties and len(properties['widths']) > 0:
features['ic_peak_width'] = properties['widths'][0]
else:
features['ic_peak_width'] = np.nan
else:
features.update({
'ic_peak_height': np.nan,
'ic_peak_voltage': np.nan,
'ic_peak_area': np.nan,
'ic_peak_width': np.nan
})
except Exception as e:
print(f"IC特征提取错误: {e}")
features.update({
'ic_peak_height': np.nan,
'ic_peak_voltage': np.nan,
'ic_peak_area': np.nan,
'ic_peak_width': np.nan
})
return features
def extract_dv_features(self, capacity_dv, dv_curve):
"""
提取DV曲线特征
"""
features = {}
try:
# 寻找DV曲线谷值(负峰)
valleys, properties = find_peaks(-dv_curve, height=0.01, distance=10)
if len(valleys) > 0:
# 主谷特征
main_valley_idx = valleys[np.argmax(properties['peak_heights'])]
features['dv_valley_depth'] = dv_curve[main_valley_idx]
features['dv_valley_capacity'] = capacity_dv[main_valley_idx]
# 曲率特征
dv_gradient = np.gradient(dv_curve)
features['dv_max_gradient'] = np.max(np.abs(dv_gradient))
else:
features.update({
'dv_valley_depth': np.nan,
'dv_valley_capacity': np.nan,
'dv_max_gradient': np.nan
})
except Exception as e:
print(f"DV特征提取错误: {e}")
features.update({
'dv_valley_depth': np.nan,
'dv_valley_capacity': np.nan,
'dv_max_gradient': np.nan
})
return features
def extract_all_features(self, processed_data, cycle_numbers):
"""
提取所有循环的特征
"""
features_list = []
valid_cycles = []
for cycle in cycle_numbers:
if cycle not in processed_data:
continue
voltage = processed_data[cycle]['voltage']
capacity = processed_data[cycle]['capacity']
# 计算IC和DV曲线
voltage_ic, ic_smooth, ic_raw = self.calculate_ic_curve(voltage, capacity)
capacity_dv, dv_smooth, dv_raw = self.calculate_dv_curve(voltage, capacity)
if voltage_ic is None or capacity_dv is None:
continue
# 提取特征
features = {'cycle': cycle}
# IC特征
ic_features = self.extract_ic_features(voltage_ic, ic_smooth)
features.update({f'ic_{k}': v for k, v in ic_features.items()})
# DV特征
dv_features = self.extract_dv_features(capacity_dv, dv_smooth)
features.update({f'dv_{k}': v for k, v in dv_features.items()})
# 基本统计特征
features['max_capacity'] = np.max(capacity)
features['min_voltage'] = np.min(voltage)
features['max_voltage'] = np.max(voltage)
features_list.append(features)
valid_cycles.append(cycle)
features_df = pd.DataFrame(features_list)
features_df = features_df.sort_values('cycle').reset_index(drop=True)
# 填充缺失值
features_df = features_df.ffill().bfill()
self.feature_names = [col for col in features_df.columns if col != 'cycle']
return features_df, valid_cycles
def calculate_soh(self, features_df, initial_capacity):
"""
计算健康状态SOH
SOH = 当前容量 / 初始容量 × 100%
"""
soh = (features_df['max_capacity'] / initial_capacity) * 100
return soh
def prepare_training_data(self, features_df, soh, train_ratio=0.7):
"""
准备训练数据
"""
# 创建滞后特征用于时间序列预测
lag_features = []
for feature in self.feature_names:
for lag in [1, 2, 3]:
features_df[f'{feature}_lag{lag}'] = features_df[feature].shift(lag)
lag_features.append(f'{feature}_lag{lag}')
features_df = features_df.dropna()
# 划分训练测试集
split_idx = int(len(features_df) * train_ratio)
X = features_df[self.feature_names + lag_features]
y = soh.iloc[features_df.index]
X_train = X.iloc[:split_idx]
X_test = X.iloc[split_idx:]
y_train = y.iloc[:split_idx]
y_test = y.iloc[split_idx:]
# 标准化特征
X_train_scaled = self.scaler.fit_transform(X_train)
X_test_scaled = self.scaler.transform(X_test)
return X_train_scaled, X_test_scaled, y_train, y_test, features_df.index[split_idx:]
def train_model(self, X_train, y_train, model_type='random_forest'):
"""
训练预测模型
"""
if model_type == 'random_forest':
self.model = RandomForestRegressor(
n_estimators=100,
max_depth=10,
random_state=42
)
elif model_type == 'svr':
self.model = SVR(kernel='rbf', C=1.0, epsilon=0.1)
else:
raise ValueError("不支持的模型类型")
self.model.fit(X_train, y_train)
print(f"{model_type} 模型训练完成")
def predict_soh(self, X):
"""
预测SOH
"""
if self.model is None:
raise ValueError("模型未训练")
return self.model.predict(X)
def predict_rul(self, features_df, soh, threshold=80):
"""
预测剩余使用寿命(RUL)
"""
# 使用线性外推法预测RUL
cycles = features_df['cycle'].values
soh_values = soh.values
# 找到SOH首次低于阈值的时间点
below_threshold = soh_values < threshold
if np.any(below_threshold):
first_below = np.where(below_threshold)[0][0]
rul = 0
else:
# 线性回归预测
if len(cycles) > 5:
z = np.polyfit(cycles, soh_values, 1)
p = np.poly1d(z)
# 预测未来循环
future_cycles = np.arange(cycles[-1], cycles[-1] + 1000)
future_soh = p(future_cycles)
end_of_life = np.where(future_soh <= threshold)[0]
if len(end_of_life) > 0:
rul = end_of_life[0]
else:
rul = 1000 # 保守估计
else:
rul = np.nan
return rul
def evaluate_model(self, y_true, y_pred):
"""
评估模型性能
"""
mae = mean_absolute_error(y_true, y_pred)
rmse = np.sqrt(mean_squared_error(y_true, y_pred))
print(f"MAE: {mae:.4f}")
print(f"RMSE: {rmse:.4f}")
return mae, rmse
def plot_results(self, features_df, soh, y_true, y_pred, test_start_idx):
"""
绘制结果图表
"""
fig, axes = plt.subplots(2, 2, figsize=(15, 12))
# SOH预测结果
cycles = features_df['cycle'].values
train_cycles = cycles[:test_start_idx]
test_cycles = cycles[test_start_idx:]
axes[0, 0].plot(cycles, soh, 'b-', label='真实SOH', linewidth=2)
axes[0, 0].plot(test_cycles, y_pred, 'r--', label='预测SOH', linewidth=2)
axes[0, 0].axvline(x=test_cycles[0], color='gray', linestyle=':', alpha=0.7)
axes[0, 0].set_xlabel('循环次数')
axes[0, 0].set_ylabel('SOH (%)')
axes[0, 0].set_title('电池健康状态(SOH)预测')
axes[0, 0].legend()
axes[0, 0].grid(True, alpha=0.3)
# IC曲线特征演变
if 'ic_peak_height' in features_df.columns:
axes[0, 1].plot(cycles, features_df['ic_peak_height'], 'g-', linewidth=2)
axes[0, 1].set_xlabel('循环次数')
axes[0, 1].set_ylabel('IC峰值高度')
axes[0, 1].set_title('IC峰值高度演变')
axes[0, 1].grid(True, alpha=0.3)
# DV曲线特征演变
if 'dv_valley_capacity' in features_df.columns:
axes[1, 0].plot(cycles, features_df['dv_valley_capacity'], 'm-', linewidth=2)
axes[1, 0].set_xlabel('循环次数')
axes[1, 0].set_ylabel('DV谷值容量')
axes[1, 0].set_title('DV谷值容量演变')
axes[1, 0].grid(True, alpha=0.3)
# 预测误差
error = y_true - y_pred
axes[1, 1].hist(error, bins=20, alpha=0.7, edgecolor='black')
axes[1, 1].axvline(x=0, color='r', linestyle='--')
axes[1, 1].set_xlabel('预测误差')
axes[1, 1].set_ylabel('频次')
axes[1, 1].set_title('预测误差分布')
axes[1, 1].grid(True, alpha=0.3)
plt.tight_layout()
plt.show()
# 示例使用
def main():
# 初始化预测器
predictor = BatteryHealthPredictor()
# 生成模拟数据(实际应用中替换为真实数据)
def generate_simulated_data():
cycles = []
for cycle in range(1, 101):
voltage = np.linspace(3.0, 4.2, 100)
# 模拟容量衰减
capacity_max = 2.0 * np.exp(-0.005 * cycle) + 0.1 * np.random.normal()
capacity = capacity_max * (1 - np.exp(-5 * (voltage - 3.0)))
for v, c in zip(voltage, capacity):
cycles.append({
'cycle': cycle,
'voltage': v,
'capacity': c + 0.01 * np.random.normal()
})
return pd.DataFrame(cycles)
# 生成模拟数据
print("生成模拟数据...")
simulated_data = generate_simulated_data()
predictor.data = simulated_data
# 选择分析的循环
cycle_numbers = list(range(1, 101, 5)) # 每5个循环分析一次
# 预处理数据
print("预处理数据...")
processed_data = predictor.preprocess_voltage_capacity_data(cycle_numbers)
# 提取特征
print("提取IC/DV特征...")
features_df, valid_cycles = predictor.extract_all_features(processed_data, cycle_numbers)
# 计算SOH(假设初始容量为2.0Ah)
initial_capacity = 2.0
soh = predictor.calculate_soh(features_df, initial_capacity)
# 准备训练数据
print("准备训练数据...")
X_train, X_test, y_train, y_test, test_start_idx = predictor.prepare_training_data(
features_df, soh, train_ratio=0.7
)
# 训练模型
print("训练模型...")
predictor.train_model(X_train, y_train, model_type='random_forest')
# 预测
y_pred = predictor.predict_soh(X_test)
# 评估模型
print("模型评估:")
mae, rmse = predictor.evaluate_model(y_test, y_pred)
# 预测RUL
rul = predictor.predict_rul(features_df, soh, threshold=80)
print(f"预测剩余使用寿命(RUL): {rul} 次循环")
# 绘制结果
predictor.plot_results(features_df, soh, y_test, y_pred, test_start_idx)
return predictor, features_df, soh
if __name__ == "__main__":
predictor, features_df, soh = main()
这个实现方案包含以下主要功能:
核心特性
- 数据预处理
· 电压-容量数据清洗和排序
· 去除重复点和异常值 - IC/DV分析
· 增量容量(IC)曲线计算
· 差分电压(DV)曲线计算
· 曲线平滑处理 - 特征提取
· IC曲线峰值特征(高度、位置、面积)
· DV曲线谷值特征
· 统计特征 - 健康状态预测
· 基于机器学习的SOH预测
· 多种模型支持(随机森林、SVR等) - 剩余寿命预测
· 基于趋势外推的RUL预测
· 可配置寿命终止阈值
使用方法
- 准备数据:确保数据包含电压、容量、循环编号等列
- 初始化预测器:配置滤波器参数
- 特征提取:自动计算IC/DV曲线并提取特征
- 模型训练:使用历史数据训练预测模型
- 预测评估:预测SOH和RUL,并评估准确性