题目
以下采用K-NN算法来解决水仙花的分类问题,每个样本有两个特征,第一个为水仙花的花萼长度,第二个为水仙花 的花萼宽度,具体数据见表,
1)设置k=3, 采用欧式距离,分析分类精度为多少?
2)使用网格搜索方式找到最佳参数,并预测
3)可视化
我的数据集合就是这个
excel数据展示
代码
import numpy as np
import pandas as pd
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import GridSearchCV
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap
def model_selection(x_train, y_train):
params = {'n_neighbors': [3,5,7,8,10], 'p': [1,2]}
model = KNeighborsClassifier()
gs = GridSearchCV(model, params, verbose=2, cv=5)
gs.fit(x_train, y_train)
print("Best Model:", gs.best_params_, "Accuracy:", gs.best_score_)
return gs.best_estimator_
def read():
filename = r"data/shuixianhua.xlsx"
data = pd.read_excel(filename, header=None)
x1 = data.iloc[1:, [0, 1]].values
x2 = data.iloc[1:, [3, 4]].values
# print(x2)
y1 = data.iloc[1:, 2].values
y2 = data.iloc[1:, 5].values
x = np.vstack((x1, x2)) # 竖向合并
y = np.hstack((y1, y2)) # 横向合并
y = y.astype(int)
return x, y
def plot_decision_boundary(x, y, model):
h = 0.02 # Step size in the mesh
# Create color maps
cmap_light = ListedColormap(['#FFAAAA', '#AAFFAA'])
cmap_bold = ListedColormap(['#FF0000', '#00FF00'])
x_min, x_max = x[:, 0].min() - 1, x[:, 0].max() + 1
y_min, y_max = x[:, 1].min() - 1, x[:, 1].max() + 1
xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))
Z = model.predict(np.c_[xx.ravel(), yy.ravel()])
Z = Z.reshape(xx.shape)
plt.figure()
plt.pcolormesh(xx, yy, Z, cmap=cmap_light)
plt.scatter(x[:, 0], x[:, 1], c=y, cmap=cmap_bold, edgecolor='k', s=20)
plt.xlim(xx.min(), xx.max())
plt.ylim(yy.min(), yy.max())
plt.title("KNN Decision Boundaries")
plt.show()
if __name__ == '__main__':
x, y = read()
best_model = model_selection(x, y)
plot_decision_boundary(x, y, best_model)