一个基于无干扰增量容量(IC)和差分电压(DV)分析的锂离子电池健康状态(SOH)与剩余寿命(RUL)预测的Python实现

python 复制代码
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy.signal import savgol_filter, find_peaks
from scipy.interpolate import interp1d
from sklearn.ensemble import RandomForestRegressor
from sklearn.svm import SVR
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_absolute_error, mean_squared_error
import warnings
warnings.filterwarnings('ignore')

class BatteryHealthPredictor:
    def __init__(self, window_size=51, poly_order=3):
        """
        初始化电池健康预测器
        
        参数:
        window_size: Savitzky-Golay滤波器窗口大小
        poly_order: Savitzky-Golay滤波器多项式阶数
        """
        self.window_size = window_size
        self.poly_order = poly_order
        self.scaler = StandardScaler()
        self.model = None
        self.feature_names = []
        
    def load_data(self, file_path):
        """
        加载电池循环数据
        假设数据格式: 循环编号, 电压, 电流, 容量, 时间等
        """
        try:
            self.data = pd.read_csv(file_path)
            print(f"数据加载成功，共 {len(self.data)} 行")
            return True
        except Exception as e:
            print(f"数据加载失败: {e}")
            return False
    
    def preprocess_voltage_capacity_data(self, cycle_numbers):
        """
        预处理电压-容量数据
        """
        processed_data = {}
        
        for cycle in cycle_numbers:
            cycle_data = self.data[self.data['cycle'] == cycle].copy()
            
            if len(cycle_data) < 10:  # 跳过数据点过少的循环
                continue
                
            # 按电压排序并去除重复电压值
            cycle_data = cycle_data.sort_values('voltage')
            cycle_data = cycle_data.drop_duplicates('voltage', keep='first')
            
            # 确保电压单调递增
            if not cycle_data['voltage'].is_monotonic_increasing:
                cycle_data = cycle_data[cycle_data['voltage'].diff() > 0]
            
            processed_data[cycle] = {
                'voltage': cycle_data['voltage'].values,
                'capacity': cycle_data['capacity'].values
            }
            
        return processed_data
    
    def calculate_ic_curve(self, voltage, capacity):
        """
        计算增量容量(IC)曲线
        IC = dQ/dV
        """
        try:
            # 使用中心差分法计算微分
            dQ = np.diff(capacity)
            dV = np.diff(voltage)
            ic_raw = dQ / dV
            
            # 对应的电压点（中心点）
            voltage_ic = (voltage[:-1] + voltage[1:]) / 2
            
            # 使用Savitzky-Golay滤波器平滑IC曲线
            if len(ic_raw) > self.window_size:
                ic_smooth = savgol_filter(ic_raw, self.window_size, self.poly_order)
            else:
                ic_smooth = ic_raw
                
            return voltage_ic, ic_smooth, ic_raw
            
        except Exception as e:
            print(f"IC曲线计算错误: {e}")
            return None, None, None
    
    def calculate_dv_curve(self, voltage, capacity):
        """
        计算差分电压(DV)曲线
        DV = dV/dQ
        """
        try:
            # 使用中心差分法计算微分
            dV = np.diff(voltage)
            dQ = np.diff(capacity)
            dv_raw = dV / dQ
            
            # 对应的容量点（中心点）
            capacity_dv = (capacity[:-1] + capacity[1:]) / 2
            
            # 平滑处理
            if len(dv_raw) > self.window_size:
                dv_smooth = savgol_filter(dv_raw, self.window_size, self.poly_order)
            else:
                dv_smooth = dv_raw
                
            return capacity_dv, dv_smooth, dv_raw
            
        except Exception as e:
            print(f"DV曲线计算错误: {e}")
            return None, None, None
    
    def extract_ic_features(self, voltage_ic, ic_curve):
        """
        提取IC曲线特征
        """
        features = {}
        
        try:
            # 寻找IC曲线峰值
            peaks, properties = find_peaks(ic_curve, height=0.01, distance=10)
            
            if len(peaks) > 0:
                # 主峰特征
                main_peak_idx = peaks[np.argmax(properties['peak_heights'])]
                features['ic_peak_height'] = ic_curve[main_peak_idx]
                features['ic_peak_voltage'] = voltage_ic[main_peak_idx]
                
                # 峰面积（近似积分）
                features['ic_peak_area'] = np.trapz(np.clip(ic_curve, 0, None), voltage_ic)
                
                # 峰宽度
                if 'widths' in properties and len(properties['widths']) > 0:
                    features['ic_peak_width'] = properties['widths'][0]
                else:
                    features['ic_peak_width'] = np.nan
            else:
                features.update({
                    'ic_peak_height': np.nan,
                    'ic_peak_voltage': np.nan,
                    'ic_peak_area': np.nan,
                    'ic_peak_width': np.nan
                })
                
        except Exception as e:
            print(f"IC特征提取错误: {e}")
            features.update({
                'ic_peak_height': np.nan,
                'ic_peak_voltage': np.nan,
                'ic_peak_area': np.nan,
                'ic_peak_width': np.nan
            })
            
        return features
    
    def extract_dv_features(self, capacity_dv, dv_curve):
        """
        提取DV曲线特征
        """
        features = {}
        
        try:
            # 寻找DV曲线谷值（负峰）
            valleys, properties = find_peaks(-dv_curve, height=0.01, distance=10)
            
            if len(valleys) > 0:
                # 主谷特征
                main_valley_idx = valleys[np.argmax(properties['peak_heights'])]
                features['dv_valley_depth'] = dv_curve[main_valley_idx]
                features['dv_valley_capacity'] = capacity_dv[main_valley_idx]
                
                # 曲率特征
                dv_gradient = np.gradient(dv_curve)
                features['dv_max_gradient'] = np.max(np.abs(dv_gradient))
                
            else:
                features.update({
                    'dv_valley_depth': np.nan,
                    'dv_valley_capacity': np.nan,
                    'dv_max_gradient': np.nan
                })
                
        except Exception as e:
            print(f"DV特征提取错误: {e}")
            features.update({
                'dv_valley_depth': np.nan,
                'dv_valley_capacity': np.nan,
                'dv_max_gradient': np.nan
            })
            
        return features
    
    def extract_all_features(self, processed_data, cycle_numbers):
        """
        提取所有循环的特征
        """
        features_list = []
        valid_cycles = []
        
        for cycle in cycle_numbers:
            if cycle not in processed_data:
                continue
                
            voltage = processed_data[cycle]['voltage']
            capacity = processed_data[cycle]['capacity']
            
            # 计算IC和DV曲线
            voltage_ic, ic_smooth, ic_raw = self.calculate_ic_curve(voltage, capacity)
            capacity_dv, dv_smooth, dv_raw = self.calculate_dv_curve(voltage, capacity)
            
            if voltage_ic is None or capacity_dv is None:
                continue
                
            # 提取特征
            features = {'cycle': cycle}
            
            # IC特征
            ic_features = self.extract_ic_features(voltage_ic, ic_smooth)
            features.update({f'ic_{k}': v for k, v in ic_features.items()})
            
            # DV特征
            dv_features = self.extract_dv_features(capacity_dv, dv_smooth)
            features.update({f'dv_{k}': v for k, v in dv_features.items()})
            
            # 基本统计特征
            features['max_capacity'] = np.max(capacity)
            features['min_voltage'] = np.min(voltage)
            features['max_voltage'] = np.max(voltage)
            
            features_list.append(features)
            valid_cycles.append(cycle)
            
        features_df = pd.DataFrame(features_list)
        features_df = features_df.sort_values('cycle').reset_index(drop=True)
        
        # 填充缺失值
        features_df = features_df.ffill().bfill()
        
        self.feature_names = [col for col in features_df.columns if col != 'cycle']
        
        return features_df, valid_cycles
    
    def calculate_soh(self, features_df, initial_capacity):
        """
        计算健康状态SOH
        SOH = 当前容量 / 初始容量 × 100%
        """
        soh = (features_df['max_capacity'] / initial_capacity) * 100
        return soh
    
    def prepare_training_data(self, features_df, soh, train_ratio=0.7):
        """
        准备训练数据
        """
        # 创建滞后特征用于时间序列预测
        lag_features = []
        for feature in self.feature_names:
            for lag in [1, 2, 3]:
                features_df[f'{feature}_lag{lag}'] = features_df[feature].shift(lag)
                lag_features.append(f'{feature}_lag{lag}')
        
        features_df = features_df.dropna()
        
        # 划分训练测试集
        split_idx = int(len(features_df) * train_ratio)
        
        X = features_df[self.feature_names + lag_features]
        y = soh.iloc[features_df.index]
        
        X_train = X.iloc[:split_idx]
        X_test = X.iloc[split_idx:]
        y_train = y.iloc[:split_idx]
        y_test = y.iloc[split_idx:]
        
        # 标准化特征
        X_train_scaled = self.scaler.fit_transform(X_train)
        X_test_scaled = self.scaler.transform(X_test)
        
        return X_train_scaled, X_test_scaled, y_train, y_test, features_df.index[split_idx:]
    
    def train_model(self, X_train, y_train, model_type='random_forest'):
        """
        训练预测模型
        """
        if model_type == 'random_forest':
            self.model = RandomForestRegressor(
                n_estimators=100,
                max_depth=10,
                random_state=42
            )
        elif model_type == 'svr':
            self.model = SVR(kernel='rbf', C=1.0, epsilon=0.1)
        else:
            raise ValueError("不支持的模型类型")
            
        self.model.fit(X_train, y_train)
        print(f"{model_type} 模型训练完成")
        
    def predict_soh(self, X):
        """
        预测SOH
        """
        if self.model is None:
            raise ValueError("模型未训练")
        return self.model.predict(X)
    
    def predict_rul(self, features_df, soh, threshold=80):
        """
        预测剩余使用寿命(RUL)
        """
        # 使用线性外推法预测RUL
        cycles = features_df['cycle'].values
        soh_values = soh.values
        
        # 找到SOH首次低于阈值的时间点
        below_threshold = soh_values < threshold
        if np.any(below_threshold):
            first_below = np.where(below_threshold)[0][0]
            rul = 0
        else:
            # 线性回归预测
            if len(cycles) > 5:
                z = np.polyfit(cycles, soh_values, 1)
                p = np.poly1d(z)
                
                # 预测未来循环
                future_cycles = np.arange(cycles[-1], cycles[-1] + 1000)
                future_soh = p(future_cycles)
                
                end_of_life = np.where(future_soh <= threshold)[0]
                if len(end_of_life) > 0:
                    rul = end_of_life[0]
                else:
                    rul = 1000  # 保守估计
            else:
                rul = np.nan
                
        return rul
    
    def evaluate_model(self, y_true, y_pred):
        """
        评估模型性能
        """
        mae = mean_absolute_error(y_true, y_pred)
        rmse = np.sqrt(mean_squared_error(y_true, y_pred))
        
        print(f"MAE: {mae:.4f}")
        print(f"RMSE: {rmse:.4f}")
        
        return mae, rmse
    
    def plot_results(self, features_df, soh, y_true, y_pred, test_start_idx):
        """
        绘制结果图表
        """
        fig, axes = plt.subplots(2, 2, figsize=(15, 12))
        
        # SOH预测结果
        cycles = features_df['cycle'].values
        train_cycles = cycles[:test_start_idx]
        test_cycles = cycles[test_start_idx:]
        
        axes[0, 0].plot(cycles, soh, 'b-', label='真实SOH', linewidth=2)
        axes[0, 0].plot(test_cycles, y_pred, 'r--', label='预测SOH', linewidth=2)
        axes[0, 0].axvline(x=test_cycles[0], color='gray', linestyle=':', alpha=0.7)
        axes[0, 0].set_xlabel('循环次数')
        axes[0, 0].set_ylabel('SOH (%)')
        axes[0, 0].set_title('电池健康状态(SOH)预测')
        axes[0, 0].legend()
        axes[0, 0].grid(True, alpha=0.3)
        
        # IC曲线特征演变
        if 'ic_peak_height' in features_df.columns:
            axes[0, 1].plot(cycles, features_df['ic_peak_height'], 'g-', linewidth=2)
            axes[0, 1].set_xlabel('循环次数')
            axes[0, 1].set_ylabel('IC峰值高度')
            axes[0, 1].set_title('IC峰值高度演变')
            axes[0, 1].grid(True, alpha=0.3)
        
        # DV曲线特征演变
        if 'dv_valley_capacity' in features_df.columns:
            axes[1, 0].plot(cycles, features_df['dv_valley_capacity'], 'm-', linewidth=2)
            axes[1, 0].set_xlabel('循环次数')
            axes[1, 0].set_ylabel('DV谷值容量')
            axes[1, 0].set_title('DV谷值容量演变')
            axes[1, 0].grid(True, alpha=0.3)
        
        # 预测误差
        error = y_true - y_pred
        axes[1, 1].hist(error, bins=20, alpha=0.7, edgecolor='black')
        axes[1, 1].axvline(x=0, color='r', linestyle='--')
        axes[1, 1].set_xlabel('预测误差')
        axes[1, 1].set_ylabel('频次')
        axes[1, 1].set_title('预测误差分布')
        axes[1, 1].grid(True, alpha=0.3)
        
        plt.tight_layout()
        plt.show()

# 示例使用
def main():
    # 初始化预测器
    predictor = BatteryHealthPredictor()
    
    # 生成模拟数据（实际应用中替换为真实数据）
    def generate_simulated_data():
        cycles = []
        for cycle in range(1, 101):
            voltage = np.linspace(3.0, 4.2, 100)
            # 模拟容量衰减
            capacity_max = 2.0 * np.exp(-0.005 * cycle) + 0.1 * np.random.normal()
            capacity = capacity_max * (1 - np.exp(-5 * (voltage - 3.0)))
            
            for v, c in zip(voltage, capacity):
                cycles.append({
                    'cycle': cycle,
                    'voltage': v,
                    'capacity': c + 0.01 * np.random.normal()
                })
        
        return pd.DataFrame(cycles)
    
    # 生成模拟数据
    print("生成模拟数据...")
    simulated_data = generate_simulated_data()
    predictor.data = simulated_data
    
    # 选择分析的循环
    cycle_numbers = list(range(1, 101, 5))  # 每5个循环分析一次
    
    # 预处理数据
    print("预处理数据...")
    processed_data = predictor.preprocess_voltage_capacity_data(cycle_numbers)
    
    # 提取特征
    print("提取IC/DV特征...")
    features_df, valid_cycles = predictor.extract_all_features(processed_data, cycle_numbers)
    
    # 计算SOH（假设初始容量为2.0Ah）
    initial_capacity = 2.0
    soh = predictor.calculate_soh(features_df, initial_capacity)
    
    # 准备训练数据
    print("准备训练数据...")
    X_train, X_test, y_train, y_test, test_start_idx = predictor.prepare_training_data(
        features_df, soh, train_ratio=0.7
    )
    
    # 训练模型
    print("训练模型...")
    predictor.train_model(X_train, y_train, model_type='random_forest')
    
    # 预测
    y_pred = predictor.predict_soh(X_test)
    
    # 评估模型
    print("模型评估:")
    mae, rmse = predictor.evaluate_model(y_test, y_pred)
    
    # 预测RUL
    rul = predictor.predict_rul(features_df, soh, threshold=80)
    print(f"预测剩余使用寿命(RUL): {rul} 次循环")
    
    # 绘制结果
    predictor.plot_results(features_df, soh, y_test, y_pred, test_start_idx)
    
    return predictor, features_df, soh

if __name__ == "__main__":
    predictor, features_df, soh = main()
这个实现方案包含以下主要功能：
核心特性
数据预处理
· 电压-容量数据清洗和排序
· 去除重复点和异常值
IC/DV分析
· 增量容量(IC)曲线计算
· 差分电压(DV)曲线计算
· 曲线平滑处理
特征提取
· IC曲线峰值特征（高度、位置、面积）
· DV曲线谷值特征
· 统计特征
健康状态预测
· 基于机器学习的SOH预测
· 多种模型支持（随机森林、SVR等）
剩余寿命预测
· 基于趋势外推的RUL预测
· 可配置寿命终止阈值
使用方法
准备数据：确保数据包含电压、容量、循环编号等列
初始化预测器：配置滤波器参数
特征提取：自动计算IC/DV曲线并提取特征
模型训练：使用历史数据训练预测模型
预测评估：预测SOH和RUL，并评估准确性