import numpy as np
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn import datasets
加载数据
我们加载的是啤酒的数据
wine = datasets.load_wine()
wine
LR逻辑斯蒂回归模型应用
import warnings
warnings.filterwarnings('ignore')#隐藏ignore报错
%%time
score = 0
for i in range(100):
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.2)
lr = LogisticRegression()
lr.fit(X_train,y_train)
s=lr.score(X_test,y_test)
score +=s/100
print('LR逻辑斯蒂回归算法多次运算平均是',score)
SVC支持向量机模型应用
%%time
score = 0
for i in range(1000):
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.2)
model = SVC()
model.fit(X_train,y_train)
s=model.score(X_test,y_test)
score +=s/1000
print('SVC算法多次运算平均是',score)
决策树模型应用
%%time
score = 0
for i in range(1000):
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.2)
model = DecisionTreeClassifier()
model.fit(X_train,y_train)
s=model.score(X_test,y_test)
score +=s/1000
print('决策树算法多次运算平均是',score)
不同算法总结对比
递归树对数据是否归一化不敏感
逻辑回归,如果不进行归一化,准确率降低,运行时间会增加
svc支持向量机,如果不进行归一化,准确率,大大降低
model = DecisionTreeClassifier()
model.fit(X_train,y_train)
model.feature_importances_
回归模型中,就线性回归可以表示重要性的大小