基于深度学习的网络物理系统故障检测与诊断(第一部分,Python)

复制代码
pip install attention
pip install keras_tuner
import os
import re
import itertools
import numpy as np
import scipy.signal
import pandas as pd
import seaborn as sns
import scipy.io as scio
import tensorflow as tf
import keras_tuner as kt
import matplotlib.pyplot as plt


from sklearn.svm import SVC
from sklearn import preprocessing
from sklearn.utils import shuffle
from sklearn.neural_network import MLPClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelBinarizer, OneHotEncoder, StandardScaler
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay, classification_report, confusion_matrix, precision_score, recall_score, f1_score, accuracy_score, roc_curve, roc_auc_score, auc


from tensorflow import keras


from keras.layers import *
from keras import backend as k
from keras.optimizers import Adam
from keras.models import Sequential,Model,load_model
from keras.callbacks import ReduceLROnPlateau, ModelCheckpoint


from tensorflow.keras.models import Sequential,Model
from tensorflow.keras.layers import Input,Dense, Dropout, Flatten, Conv1D, MaxPooling1D


from attention import Attention
from google.colab import drive
drive.mount('/content/drive')

Data Visualization

复制代码
working_cond = 40 #this corresponds to possible values under which the voltage source operates i.e., 40, 80 and 120


Path = r'/content/drive/MyDrive/ALL_DC_motor_Data/Ua_120V_Noise_2_perct'.format(working_cond) # Path of the folder containing CSV files from that working condition
file_name = os.listdir(path=Path) # List of all the files in the folder
fig, axs = plt.subplots(len(file_name), 2, figsize=(10, 2 * len(file_name)))


for i, file in enumerate(file_name):
    csv_path = os.path.join(Path, file) # Obtains the exact path for that file
    df = pd.read_csv(csv_path)  # saves that Fault data in a dummy variable "df"
    df = df.iloc[::50]
    ax1 = axs[i][0]
    ax2 = axs[i][1]
    ax1.plot(df['time'], df['a1_lower'], '-r', label='')
    ax2.plot(df['time'], df['a2_lower'], '-r', label='')
    ax1.plot(df['time'], df['a1_upper'], '-g', label='a1')
    ax2.plot(df['time'], df['a2_upper'], '-g', label='a2')
    ax1.plot(df['time'], df['ARR1'], '-b', label='')
    ax2.plot(df['time'], df['ARR2'], '-b', label='')
    ax1.set_title(file[:-13]) #to extract only the fault type from the name of fault file, _noise_02.csv contains 13 characters
    ax1.set_ylim(-30, 30)
    ax1.set_xlabel('Time')
    ax1.set_ylabel('r1')
    ax2.set_title(file[:-13])
    ax2.set_ylim(-1, 1)
    ax2.set_xlabel('Time')
    ax2.set_ylabel('r2')


plt.tight_layout()
plt.show()
复制代码
Path = r'/content/drive/MyDrive/ALL_DC_motor_Data/Ua_120V_Noise_2_perct'.format(40) # Path of the folder containing CSV files from that working condition
file_name = os.listdir(path=Path) # List of all the files in the folder


fig, axs = plt.subplots(len(file_name), 2, figsize=(10, 2 * len(file_name)))


for i, file in enumerate(file_name):
    csv_path = os.path.join(Path, file)
    df = pd.read_csv(csv_path)
    df = df.iloc[::50]
    ax1 = axs[i][0]
    ax2 = axs[i][1]
    ax1.plot(df['time'], df['Im'], '-r', label='')
    ax2.plot(df['time'], df['Wm'], '-b', label='')
    ax1.set_title(file[:-13])
    ax1.set_ylim(20, 36)
    ax1.set_xlabel('Time')
    ax1.set_ylabel('$I_m$')
    ax2.set_title(file[:-13])
    ax2.set_ylim(250, 480)
    ax2.set_xlabel('Time')
    ax2.set_ylabel('$\omega_m$')


plt.tight_layout()
plt.show()

Dataframe Creation

复制代码
def obtain_DataFrame_for_this_working_condition(working_cond):
    # Input = "Working Condition" [40V, 80V, 120V]
    # Output = "A dataFrame contaning all fault scnerio from that Working Condition"
    # The DataFrame has following columns [time, I, W, ARR1, ARR2, a1_upper,  a1_lower, a2_upper,  a2_lower,  activation_arr1,  activation_arr2 FaultClass] for the given "working_cond"


    Path = r'/content/drive/MyDrive/ALL_DC_motor_Data/Ua_{}V_Noise_2_perct'.format(working_cond) # Path of the folder containing CSV files from that working condition
    file_name = os.listdir(path=Path) # List of all the files in the folder


    DF = pd.DataFrame() # Initialize an empty DataFrame


    for f in file_name : #Iterate through each file, which coresponds to a Fault


        csv_path =  os.path.join(Path,f) #Obtains the exact path for that file


        df = pd.read_csv(csv_path) #saves that Fault data in a dummy variable "df"




        temp1=df[(df.time > 1050) & (df.time< 1500)]  # Incipient Faults -----Taking samples after which the fault was introduced


        temp2=df[(df.time > 2050) & (df.time< 2500)]  # Step Faults-----------Taking samples after which the fault was introduced


        df=pd.concat([temp1,temp2])                  #Concatinate both Incipient and Step Fault


        DF=pd.concat([DF,df])                        # Append the "f"-Fault to the new dataframe DF


    DF['Working_cond'] = np.repeat('U-{}V'.format(working_cond), len(DF))
    return DF
df_120 = obtain_DataFrame_for_this_working_condition(working_cond=120)
df_40 = obtain_DataFrame_for_this_working_condition(working_cond=40)
df_80 = obtain_DataFrame_for_this_working_condition(working_cond=80)


DF = pd.concat([df_40,df_80,df_120]) # ALL 3 working conditions are saved in one DataFRame
sns.scatterplot(data=DF.iloc[::400,:],x='Im',y='Wm',hue='Fault_type',style='Fault_type',edgecolor='black')
plt.legend()
plt.show()
复制代码
sns.scatterplot(data=DF.iloc[::200,:],x='ARR1',y='ARR2',style='Fault_type',hue='Fault_type', palette = 'deep', edgecolor = 'black')
plt.legend()
plt.show()

Data Augmentation

复制代码
def Sliding_Window(df_temp, win_len, stride):
    """
    Sliding window function for data segmentation and label extraction.


    Args:
        df_temp (DataFrame): Input dataframe containing the data.
        win_len (int): Length of the sliding window.
        stride (int): Stride or step size for sliding the window.


    Returns:
        X (ndarray): Segmented input sequences.
        Y (ndarray): Extracted output labels.
        T (ndarray): Corresponding timestamps.
    """
    X = []  # List to store segmented input sequences.
    Y = []  # List to store extracted output labels.
    T = []  # List to store corresponding timestamps.


    # Loop through the dataframe with the specified stride.
    for i in np.arange(0, len(df_temp) - win_len, stride):
        # Extract a subset of the dataframe based on the window length.
        temp = df_temp.iloc[i:i + win_len, [3, 4]].values


        # Append the segmented input sequence to the X list.
        X.append(temp)


        # Append the output label at the end of the window to the Y list.
        Y.append(df_temp.iloc[i + win_len, -1])


        # Append the timestamp at the end of the window to the T list.
        T.append(df_temp.iloc[i + win_len, 0])


    return np.array(X), np.array(Y), np.array(T)

Data Preprocessing

复制代码
def PreprocessData(working_cond, win_len, stride):
    """
    Preprocessing function to extract input sequences and output labels from CSV files of a specific working condition.


    Args:
        working_cond (str): Working condition identifier used to locate the folder containing CSV files.
        win_len (int): Length of the sliding window.
        stride (int): Stride or step size for sliding the window.


    Returns:
        X_full (ndarray): Concatenated segmented input sequences.
        Y_full (ndarray): Concatenated output labels.
    """


    Path = r'/content/drive/MyDrive/ALL_DC_motor_Data/Ua_{}V_Noise_2_perct'.format(working_cond)
    file_name = os.listdir(path=Path)


    X_full, Y_full = [], []  # Lists to store concatenated segmented input sequences and output labels


    for f in file_name:  # Iterate through each file, which corresponds to a fault
        csv_path = os.path.join(Path, f)
        df = pd.read_csv(csv_path)


        temp_df_1 = df[(df.time > 1050) & (df.time < 1500)]  # Incipient - Taking samples after which the parameter fault was introduced
        x1, y1, _ = Sliding_Window(temp_df_1, win_len, stride)


        temp_df_2 = df[(df.time > 2050) & (df.time < 2500)]  # Step - Taking samples after which the parameter fault was introduced
        x2, y2, _ = Sliding_Window(temp_df_2, win_len, stride)


        x_temp, y_temp = np.concatenate((x1, x2), axis=0), np.concatenate((y1, y2), axis=0)


        X_full.append(x_temp)
        Y_full.append(y_temp)


    X_full = np.array(X_full)
    X_full = np.reshape(X_full, (-1, X_full.shape[2], X_full.shape[3]))


    Y_full = np.array(Y_full)
    Y_full = np.reshape(Y_full, (-1))


    return X_full, Y_full
WL=20 # can be used for adjusting window length
S=40 # can be used for adjusting stride


# Preprocess data for working condition 120
X_120, Y_120 = PreprocessData(working_cond=120, win_len=WL, stride=S)


# Preprocess data for working condition 80
X_80, Y_80 = PreprocessData(working_cond=80, win_len=WL, stride=S)


# Preprocess data for working condition 40
X_40, Y_40 = PreprocessData(working_cond=40, win_len=WL, stride=S)


# Concatenate the preprocessed data from different working conditions
X_full = np.concatenate((X_40, X_80, X_120))
Y_full = np.concatenate((Y_40, Y_80, Y_120))
# Train Test split
X_train, X_test, y_train, y_test = train_test_split(X_full, Y_full, train_size=256, random_state=42)


# Standardising the data
scaler = StandardScaler()
X_train_sc = scaler.fit_transform(X_train.reshape(-1,X_train.shape[-1])).reshape(X_train.shape)
X_test_sc = scaler.transform(X_test.reshape(-1,X_test.shape[-1])).reshape(X_test.shape)


# One Hot encoding
encoder = OneHotEncoder(sparse_output=False) # in case of error, add the argument handle_unknown = 'ignore'


y_train_ohe = encoder.fit_transform(y_train.reshape(-1,1))
y_test_ohe = encoder.transform(y_test.reshape(-1,1))

Model Architecture

复制代码
def build_model(hp):


    num_classes=len(encoder.categories_[0])


    # create model object
    model = Sequential([
    Conv1D(filters=hp.Int('conv_1_filter', min_value=16, max_value=128, step=32), kernel_size=hp.Choice('conv_1_kernel', values = [3,5]), activation='relu', input_shape=(X_train.shape[1],X_train.shape[2]), padding='same'),
    MaxPooling1D(pool_size=2,padding='same'),
    LSTM(units=hp.Int('lstm_1', min_value=16, max_value=128, step=32), return_sequences=True),
    Dropout(0.2),
    LSTM(units=hp.Int('lstm_2', min_value=16, max_value=128, step=32), return_sequences=True),
    LSTM(units=hp.Int('lstm_3', min_value=16, max_value=128, step=32), return_sequences=True),
    Dropout(0.5),
    Attention(),
    Dense(units=hp.Int('dense_1_units', min_value=32, max_value=128, step=16), activation='relu'),
    Dense(num_classes, activation='softmax')
    ])


    model.compile(optimizer=keras.optimizers.Adam(hp.Choice('learning_rate', values=[1e-2, 1e-3])), loss='categorical_crossentropy', metrics=['accuracy'])


    return model
tuner = kt.RandomSearch(build_model, objective='val_accuracy', max_trials = 10) #creating randomsearch object
tuner.search(X_train_sc,y_train_ohe,epochs=20,validation_data=(X_test_sc,y_test_ohe)) # search best parameter values
HyDeLA_model_tuned=tuner.get_best_models(num_models=1)[0]
HyDeLA_model_tuned.summary()
def HYDELA_model(encoder,X_train_transformed):
    num_classes=len(encoder.categories_[0])


    HyDeLA_model = Sequential()
    HyDeLA_model.add(Conv1D(16, kernel_size=(5),activation='relu',input_shape=(X_train_transformed.shape[1],X_train_transformed.shape[2]),padding='same'))
    HyDeLA_model.add(MaxPooling1D((2),padding='same'))
    HyDeLA_model.add(LSTM(112, return_sequences=True))
    HyDeLA_model.add(Dropout(0.2))
    HyDeLA_model.add(LSTM(16,return_sequences=True))
    HyDeLA_model.add(LSTM(48,return_sequences=True))
    HyDeLA_model.add(Dropout(0.5))
    HyDeLA_model.add(Attention())
    HyDeLA_model.add(Flatten())
    HyDeLA_model.add(Dense(64, activation='relu'))
    HyDeLA_model.add(Dense(num_classes, activation='softmax'))


    HyDeLA_model.compile(loss='categorical_crossentropy', optimizer=Adam(learning_rate=0.001),metrics=['accuracy'])


    return HyDeLA_model

Model Training

复制代码
# Define an EarlyStopping callback to monitor validation accuracy and restore best weights
callback = tf.keras.callbacks.EarlyStopping(monitor='val_accuracy', patience=10, restore_best_weights=True)


# Create a model using the specified encoder and X_train_sc
hydela_model = HYDELA_model(encoder, X_train_sc)


# Train the model
history = hydela_model.fit(X_train_sc, y_train_ohe, epochs=200, batch_size=16, validation_data=(X_test_sc, y_test_ohe), callbacks=[callback], shuffle=False, verbose=1)
# Access the loss values
training_loss = history.history['loss']
validation_loss = history.history['val_loss']


epochs = range(1, len(training_loss) + 1)


plt.plot(epochs, training_loss, label='Training loss', marker = 'o', lw = 1)
plt.plot(epochs, validation_loss, label='Validation loss', marker = 'x', lw = 1)
plt.xlabel('Number of Epochs')
plt.ylabel('Loss')
plt.title('Training and Validation Loss vs Number of Epochs')
plt.legend()
plt.show()

Model Evaluation

复制代码
# Perform prediction using the CNN model on the scaled test data
y_pred = hydela_model.predict(X_test_sc)


# Inverse transform the predicted labels using the encoder
y_pred = encoder.inverse_transform(y_pred)


# Calculate and print precision, recall, F1-score and accuracy
precision = precision_score(y_test, y_pred, average='weighted')
recall = recall_score(y_test, y_pred, average='weighted')
f1 = f1_score(y_test, y_pred, average='weighted')
accuracy = accuracy_score(y_test, y_pred)


print(f"Training Sample Size = {len(X_train)}, F1 score is - {f1}")
print(f"Training Sample Size = {len(X_train)}, Accuracy is - {accuracy}")
print(f"Training Sample Size = {len(X_train)}, Precision is - {precision}")
print(f"Training Sample Size = {len(X_train)}, Recall is - {recall}")
160/160 [==============================] - 3s 8ms/step
Training Sample Size = 256, F1 score is - 0.9947349330945912
Training Sample Size = 256, Accuracy is - 0.9947265625
Training Sample Size = 256, Precision is - 0.9947711101749215
Training Sample Size = 256, Recall is - 0.9947265625
# Define the class labels
class_labels = ['Healthy', 'Re', 'Rm', 'I', 'W', 'K', 'Re & Rm', 'I & W']


# Create the confusion matrix
conf_matrix = confusion_matrix(y_test, y_pred)


# Create a heatmap
plt.figure(figsize=(8, 6))
sns.heatmap(conf_matrix, annot=True, cmap='Reds', fmt='d', xticklabels=class_labels, yticklabels=class_labels)
plt.xlabel("Predicted Labels")
plt.ylabel("Actual Labels")
plt.title("Confusion Matrix")
plt.show()
复制代码
def lab_to_num(value):
  """
  lab_to_num converts the literal labels to numeric labels as per the mapping function label_mapping = {'Re': 1, 'Rm': 2, 'I': 3, 'W': 4, 'K': 5, 'I & W': 7, 'Re & Rm': 6}
  parameter : value is assumed to be numpy.ndarray()
  """
  for i in range(len(value)):
    if value[i]=='Re':
      value[i]=1
    elif value[i]=='Rm':
      value[i]=2
    elif value[i]=='I':
      value[i]=3
    elif value[i]=='W':
      value[i]=4
    elif value[i]=='K':
      value[i]=5
    elif value[i]=='I & W':
      value[i]=7
    elif value[i]=='Re & Rm':
      value[i]=6
    else:
      value[i]=0


  return value
num_label = ['1', '2', '3', '4', '5', '6', '7']


test_roc = []
pred_roc = []


test_roc = [1 if label in num_label else 0 for label in lab_to_num(y_test)]
pred_roc = [1 if label in num_label else 0 for label in lab_to_num(y_pred)]
# Compute ROC curve
fpr, tpr, threshold = roc_curve(test_roc, pred_roc)


# Compute AUC (Area Under the Curve)
roc_auc = auc(fpr, tpr)


# Plot ROC curve
plt.figure(figsize=(8, 6))
plt.plot(fpr, tpr, color='b', lw=2, label=f"AUC = {roc_auc:.2f}")
plt.plot([0, 1], [0, 1], color='gray', linestyle='--')
plt.xlim([-0.05, 1.05])
plt.ylim([-0.05, 1.05])
plt.xlabel('False Positive Rate (FPR)')
plt.ylabel('True Positive Rate (TPR)')
plt.title('Receiver Operating Characteristic (ROC) Curve')
plt.legend(loc="lower right")
plt.show()
复制代码
擅长领域:现代信号处理,机器学习,深度学习,数字孪生,时间序列分析,设备缺陷检测、设备异常检测、设备智能故障诊断与健康管理PHM等。
知乎学术咨询:https://www.zhihu.com/consult/people/792359672131756032?isMe=1
擅长领域:现代信号处理,机器学习,深度学习,数字孪生,时间序列分析,设备缺陷检测、设备异常检测、设备智能故障诊断与健康管理PHM等。

擅长领域:现代信号处理,机器学习,深度学习,数字孪生,时间序列分析,设备缺陷检测、设备异常检测、设备智能故障诊断与健康管理PHM等。

相关推荐
Juicedata41 分钟前
JuiceFS v1.3-Beta2:集成 Apache Ranger,实现更精细化的权限控制
运维·人工智能·ai
じ☆ve 清风°43 分钟前
JavaScript 原型与原型链:深入理解 __proto__ 和 prototype 的由来与关系
开发语言·javascript·原型模式
这儿有一堆花43 分钟前
比特币:固若金汤的数字堡垒与它的四道防线
算法·区块链·哈希算法
客卿1231 小时前
力扣100-移动0
算法·leetcode·职场和发展
Work(沉淀版)3 小时前
DAY 40
人工智能·深度学习·机器学习
蓦然回首却已人去楼空4 小时前
Build a Large Language Model (From Scratch) 序章
人工智能·语言模型·自然语言处理
CM莫问4 小时前
<论文>(微软)WINA:用于加速大语言模型推理的权重感知神经元激活
人工智能·算法·语言模型·自然语言处理·大模型·推理加速
拾忆-eleven4 小时前
NLP学习路线图(二十六):自注意力机制
人工智能·深度学习
MYH5165 小时前
在NLP文本处理中,将字符映射到阿拉伯数字(构建词汇表vocab)的核心目的和意义
人工智能·深度学习·自然语言处理
程序员的世界你不懂5 小时前
Appium+python自动化(八)- 认识Appium- 下章
python·appium·自动化