【信号处理】基于EEG脑电信号的自闭症预测典型方法实现

理论

自闭者主要受到遗传和环境因素的共同影响。由于自闭症是一种谱系障碍,因此每个自闭症患者都有独特的优势和挑战。自闭症患者学习、思考和解决问题的方式可以是高技能的,也可以是严峻的挑战。研究表明,高质量的早期干预可以改善学习、沟通和社交技能,以及潜在的大脑发育。然而诊断过程可能需要数年时间。本项目主要实现自闭者的早期检测(正常vs非正常),为早期筛查和干预提供及时的预警。

工具

自闭者脑电数据集

方法实现

数据加载
python 复制代码
from sklearn.metrics import roc_auc_score
from sklearn.model_selection import train_test_split
from xgboost import XGBClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import mutual_info_classif,f_classif
from sklearn.pipeline import Pipeline
from sklearn.model_selection import cross_val_score,StratifiedKFold
from sklearn.feature_selection import RFE
from sklearn.feature_selection import RFECV
from sklearn.neural_network import MLPClassifier 
from category_encoders.target_encoder import TargetEncoder
from sklearn.model_selection import GridSearchCV,RandomizedSearchCV
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler,RobustScaler
from category_encoders import MEstimateEncoder
from sklearn.preprocessing import LabelEncoder
from imblearn.over_sampling import RandomOverSampler
from sklearn.inspection import permutation_importance
from imblearn.over_sampling import SMOTE
from sklearn.svm import SVC
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix, accuracy_score
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import StackingClassifier,VotingClassifier
from sklearn.metrics import roc_auc_score, roc_curve, confusion_matrix, ConfusionMatrixDisplay

train=pd.read_csv('/Autism_Prediction/train.csv')
test=pd.read_csv('/Autism_Prediction/test.csv')
k-折交叉验证数据划分
python 复制代码
np.random.seed(1) #I'm using this because there's some
#randomness in how the selectors work, without this, in each run we get different results
kf = StratifiedKFold(n_splits=2, random_state=None,shuffle=False) #for cross validation/ random_state
# is None because shuffle is False
score=[]

for train_index, val_index in kf.split(train_set,y):
    
    #indices for train and validation sets
    X_train, X_val =train_set.iloc[train_index,:], train_set.iloc[val_index,:]
    y_train, y_val = y[train_index], y[val_index]
    
    #******************************* CLEANING ***********************************

    #for train set
    X_train.ethnicity=X_train.ethnicity.str.replace('others','Others',regex=False)
    X_train.ethnicity=X_train.ethnicity.str.replace('?','Others',regex=False)
    X_train.relation=X_train.relation.str.replace('?','Others',regex=False)
    X_train.relation=X_train.relation.str.replace('Health care professional','Others',regex=False)
    
    
    #for validation set:
    X_val.ethnicity=X_val.ethnicity.str.replace('others','Others',regex=False)
    X_val.ethnicity=X_val.ethnicity.str.replace('?','Others',regex=False)
    X_val.relation=X_val.relation.str.replace('?','Others',regex=False)
    X_val.relation=X_val.relation.str.replace('Health care professional','Others',regex=False)

  
    #***************************************ENCODING****************************************** 
    
    #FOR ENCODING USE THE TRAINING VALUES, DO NOT CALCULATE THEM AGAIN FOR THE TEST SET!
    
    le=LabelEncoder()
    for col in ['jaundice','austim']:
        
        #for the training set:
        X_train[col]=le.fit_transform(X_train[col])
        
        #for the validation set:
        X_val[col]=le.transform(X_val[col])
         

    #*********************Encoding Relation Column***************************
    
    #create an encoding map, using the training set, then implementing it on val and test sets
    rel=X_train.relation.value_counts()
    rel=dict(zip(rel.index,range(len(rel))))
    
    #for the training set:
    X_train.relation=X_train.relation.map(rel)
    
    #for the validation set: if there's a category not present in the map, we'll assign sth. to it
    X_val.relation=X_val.relation.map(rel)
    X_val.relation[X_val.relation.isna()]=len(rel)
    
    
    
    #*********************Encoding Ethnicity Column***************************
    
    #create an encoding map, using the training set, then implementing it on val and test sets
    eth=X_train.ethnicity.value_counts()
    eth=dict(zip(eth.index,range(len(eth))))
    
    #for the training set:
    X_train.ethnicity=X_train.ethnicity.map(eth)
    
    #for the validation set: if there's a category not present in the map, we'll assign sth. to it
    X_val.ethnicity=X_val.ethnicity.map(eth)
    X_val.ethnicity[X_val.ethnicity.isna()]=len(eth)
    
    
    #*****************************Encoding Country Of Res******************************
    
    #create an encoding map, using the training set, then implementing it on val and test sets
    cont=X_train.contry_of_res.value_counts()
    cont=dict(zip(cont.index,range(len(cont))))
    
    #for the training set:
    X_train.contry_of_res=X_train.contry_of_res.map(cont)
    
    #for the validation set: if there's a category not present in the map, we'll assign sth. to it
    X_val.contry_of_res=X_val.contry_of_res.map(cont)
    X_val.contry_of_res[X_val.contry_of_res.isna()]=len(cont)

    #***************************Age Grouping***********************************
    
#     age_grouper(X_train)
#     age_grouper(X_val)
            
    #*******************************Standardization*************************
    ss=StandardScaler()
    rs=RobustScaler()
    X_train[['result','age']]=rs.fit_transform(X_train[['result','age']])
    X_val[['result','age']]=rs.transform(X_val[['result','age']])
使用不同模型进行数据分类
python 复制代码
model_list = ['KNearestNeighbours', 'DecisionTree', 'LGBM','XGBRF','CatBoostClassifier','RandomForest','Logistic Regression', 'SVC' ]
k近邻模型
python 复制代码
# K Neighbors Classifier

kn_clf = KNeighborsClassifier(n_neighbors=6)
kn_clf.fit(X_train,y_train)
y_pred=pd.DataFrame(kn_clf.predict_proba(X_val))[1].values
score.append(roc_auc_score(y_val,y_pred))
    
np.array(score)

cm = confusion_matrix(y_val, kn_clf.predict(X_val))
cmd = ConfusionMatrixDisplay(cm)
cmd.plot();
决策树模型
python 复制代码
#DecissionTree
dt_clf = DecisionTreeClassifier(max_leaf_nodes=10, random_state=0, criterion='entropy')
dt_clf.fit(X_train, y_train)
y_pred=pd.DataFrame(dt_clf.predict_proba(X_val))[1].values
score.append(roc_auc_score(y_val,y_pred))
    
np.array(score)

cm = confusion_matrix(y_val, dt_clf.predict(X_val))
cmd = ConfusionMatrixDisplay(cm)
cmd.plot();
lightgbm模型
python 复制代码
#  lightgbm 
import lightgbm
lgb_clf = lightgbm.LGBMClassifier(max_depth=2, random_state=4)
lgb_clf.fit(X_train, y_train)
y_pred=pd.DataFrame(lgb_clf.predict_proba(X_val))[1].values
score.append(roc_auc_score(y_val,y_pred))
    
np.array(score)

cm = confusion_matrix(y_val, lgb_clf.predict(X_val))
cmd = ConfusionMatrixDisplay(cm)
cmd.plot();

代码获取

相关问题和项目开发,欢迎交流沟通。

相关推荐
王亭_6661 分钟前
Ollama+open-webui搭建私有本地大模型详细教程
人工智能·大模型·ollama·openwebui·deepseek
集和诚JHCTECH5 分钟前
集和诚携手Intel重磅发布BRAV-7820边缘计算新品,为车路云一体化场景提供强大算力支撑
人工智能·嵌入式硬件·边缘计算
itwangyang5207 分钟前
人工智能在生物医药领域的应用地图:AIBC2025将于6月在上海召开!
人工智能·百度
PingCAP21 分钟前
TiDB 亮相宜昌“医院‘云数智’技术实践研讨及成果展示交流会”,探讨国产化 + AI 背景下的数据库新趋势
数据库·人工智能·tidb
文弱_书生21 分钟前
再谈图像处理中的傅里叶变换
图像处理·人工智能·傅里叶变换
钡铼技术物联网关24 分钟前
ARM边缘计算时代:BLIoTLink如何打通设备互联任督二脉
arm开发·人工智能·边缘计算
小李独爱秋31 分钟前
机器学习开发全流程详解:从数据到部署的完整指南
人工智能·机器学习
Dovis(誓平步青云)34 分钟前
深挖 DeepSeek 隐藏玩法·智能炼金术2.0版本
人工智能·深度学习·机器学习·数据挖掘·服务发现·智慧城市
陈明勇36 分钟前
一文掌握 MCP 上下文协议:从理论到实践
人工智能·后端·mcp
zskj_zhyl38 分钟前
智绅科技全场景智慧养老系统:助力老年人畅享幸福晚年
人工智能·科技