机器学习模板代码(期末考试复习)自用存档

机器学习复习代码

利用sklearn实现knn

pyt 复制代码
import numpy as np
import pandas as pd
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import GridSearchCV

def model_selection(x_train, y_train):
    ## 第一个是网格搜索
    ## p是选择查找方式:1是欧式距离   2是曼哈顿距离
    params = {'n_neighbors': [3,5,7], 'p': [1,2]}
    model = KNeighborsClassifier()
    gs = GridSearchCV(model, params, verbose=2, cv=5)
    gs.fit(x_train, y_train)
    print("Best Model:", gs.best_params_, "Accuracy:", gs.best_score_)
    print(gs.best_estimator_)
    return gs.best_estimator_

def read():
    filename = r"data/shuixianhua.xlsx"
    data = pd.read_excel(filename, header=None)
    ## iloc[行,列]
    x1 = data.iloc[1:, [0, 1]].values
    x2 = data.iloc[1:, [3, 4]].values
    # print(x2)
    y1 = data.iloc[1:, 2].values
    y2 = data.iloc[1:, 5].values
    x = np.vstack((x1, x2))  # 竖向合并
    print("x:")
    print(x)
    y = np.hstack((y1, y2))  # 横向合并
    print("y:")
    print(y)

## 这里是因为我把excel的y理解成string类型了,如果正常读可以不加这个
    ## 将y转为数值的int
    y = y.astype(int)
    
    return x, y


if __name__ == '__main__':
    x, y = read()
    best_model = model_selection(x, y)

利用sklearn实现线性回归

数据集展示

复制代码
import pandas as pd
from sklearn.linear_model import LinearRegression
import numpy as np
def MAE(y,y_pre):
    return np.mean(np.abs(y-y_pre))
def MSE(y,y_pred):
    return np.mean((y-y_pred)**2)
def RMSE(y,y_pred):
    return np.sqrt(MSE(y,y_pred))
def MAPE(y,y_pred):
    return np.mean(np.abs(y-y_pred)/y)
def R2(y,y_pred):
    u=np.sum((y-y_pred)**2)
    v=np.sum((y-np.mean(y_pred))**2)
    return 1-(u/v)
def judege(name,y,y_pre):
    mae=MAE(y,y_pre)
    mse=MSE(y,y_pre)
    rmse=RMSE(y,y_pre)
    mape=MAPE(y,y_pre)
    r2=R2(y,y_pre)
    print(f"{name}的MAE:{mae},MSE:{mse},RMSE:{rmse}.MAPE:{mape},R2:{r2}")

def read():
    filename = r"../data/ComposePlot.xlsx"
    data=pd.read_excel(filename,header=None)
    x1 = data.iloc[2:, [0,]].values
    y1 = data.iloc[2:,1].values

    x2 = data.iloc[2:,[2,]].values
    y2 = data.iloc[2:,3].values

    x3 = data.iloc[2:,[4,]].values
    y3 = data.iloc[2:,5].values

    x4 = data.iloc[2:,[6,]].values
    y4 = data.iloc[2:,7].values
    return x1,y1,x2,y2,x3,y3,x4,y4

def getModel(x,y):
    model = LinearRegression()
    model.fit(x,y)
    return model

def main(x1, y1, x2, y2, x3, y3, x4, y4):
    model1 = getModel(x1,y1)
    model2 = getModel(x2, y2)
    model3 =getModel(x3,y3)
    model4 =getModel(x4,y4)
    judege("mode1",y1,model1.predict(x1))
    judege("mode2",y2,model2.predict(x2))
    judege("mode3",y3,model3.predict(x3))
    judege("mode4",y4,model4.predict(x4))




if __name__ == '__main__':
    x1, y1, x2, y2, x3, y3, x4, y4 = read()
    main(x1, y1, x2, y2, x3, y3, x4, y4)

利用sklearn实现逻辑回归

数据集展示

pyth 复制代码
import pandas as pd
import numpy as np
from sklearn.linear_model import LogisticRegression


def main(x,y):
    model=LogisticRegression()
    model.fit(x,y)
    print(model.predict(x))
def read():
    filename = "data/student.xlsx"
    data=pd.read_excel(filename,header=None)
    x=data.iloc[1:,[0,1]].values
    y=data.iloc[1:,2].values
    print(x)
    print(y)
    return x,y
if __name__ =='__main__':
    x,y=read()
    main(x,y)

利用sklearn实现SVM(向量机)

pyt 复制代码
from sklearn.svm import SVC
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
import numpy as np
from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score, \
    f1_score


def load_data(): #导入的尾花
    data = load_iris()
    x, y = data.data, data.target
    x_train, x_test, y_train, y_test = \
        train_test_split(x, y, test_size=0.3,
                         shuffle=True, random_state=20)
    return x, y, x_train, x_test, y_train, y_test

## 无脑写这个就行
def model_selection(x_train, y_train):
    model = SVC()
    paras = {'C': np.arange(1, 10, 5),
             # rbf:高斯核函数   linear:线性核函数  poly:多项式核函数
             'kernel': ['rbf', 'linear', 'poly'],
             'degree': np.arange(1, 10, 2),
             'gamma': ['scale', 'auto'],
             'coef0': np.arange(-10, 10, 5)
             }
    gs = GridSearchCV(model, paras, cv=3, verbose=2, n_jobs=3)
    gs.fit(x_train, y_train)
    print('best score:', gs.best_score_)
    print('best parameters:', gs.best_params_)
    return gs.best_params_


def train(x_train, x_test, y_train, y_test, C, gamma, kernel):
    model = SVC(C=C, kernel=kernel, gamma=gamma)
    model.fit(x_train, y_train)
    y_pred = model.predict(x_test)
    # 生成混淆矩阵
    confusion = confusion_matrix(y_test, y_pred)

    print("混淆矩阵:\n", confusion)
    # 生成分类报告,包括准确率、精度、召回率和 F1 得分
    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred, average='weighted')
    recall = recall_score(y_test, y_pred, average='weighted')
    f1 = f1_score(y_test, y_pred, average='weighted')
    print("准确率Accuracy:", accuracy)
    print("精度Precision:", precision)
    print("召回率Recall:", recall)
    print("FScore:", f1)

if __name__ == '__main__':
    X, Y, x_train, x_test, y_train, y_test = load_data()
    best_params = model_selection(x_train, y_train)
    ## 这步骤也是无脑写就行
    train(x_train, x_test, y_train, y_test, best_params["C"], best_params["gamma"], best_params["kernel"])

小知识点扩展

将数据分成训练集和测试集

py 复制代码
   x_train, x_test, y_train, y_test = \
        train_test_split(x, y, test_size=0.3,
                         shuffle=True, random_state=20)

来自chargpt的解释

是的,这段代码用于将数据集分成训练集和测试集。具体地说,它使用train_test_split函数将输入的特征数据x和标签数据y分成四个部分:x_train(训练集的特征数据)、x_test(测试集的特征数据)、y_train(训练集的标签数据)和y_test(测试集的标签数据)。

以下是这段代码的主要参数和作用:

  • x是特征数据。
  • y是标签数据。
  • test_size=0.3指定了测试集的比例,这里是总数据的30%将被用作测试集。
  • shuffle=True表示在分割数据之前对数据进行洗牌,以确保数据的随机性。
  • random_state=20是一个随机种子,用于确保每次运行代码时分割数据的结果都相同,这有助于复现实验结果。

综上所诉,只要背就好了,还有参数的意思

归一化

pythin 复制代码
def hypo(x,w,b):
    z=np.matmul(x,w)+b
    h_x=1/(1+np.exp(-z))
    h_x=(h_x>=0.5)*1
    return h_x

书上p49,我也不太懂归一化的用法,其中z=wx+b

从0实现线性回归

pyt 复制代码
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt


# 添加归一化函数
def normalize_data(data):
    min_val = np.min(data)
    max_val = np.max(data)
    normalized_data = (data - min_val) / (max_val - min_val)
    return normalized_data


def prediction(X, W, bias):
    return np.matmul(X, W) + bias


def cost_function(X, y, W, bias):
    m, n = X.shape
    y_hat = prediction(X, W, bias)
    return 0.5 * (1 / m) * np.sum((y - y_hat) ** 2)


def gradient_descent(X, y, W, bias, alpha):
    m, n = X.shape
    y_hat = prediction(X, W, bias)
    grad_w = -(1 / m) * np.matmul(X.T, (y - y_hat))
    grad_b = -(1 / m) * np.sum(y - y_hat)
    W = W - alpha * grad_w
    bias = bias - alpha * grad_b
    return W, bias


def train(X, y, ite=200):
    m, n = X.shape
    W, b, alpha, costs = np.random.randn(n, 1), 0.1, 0.2, []

    for i in range(ite):
        costs.append(cost_function(X, y, W, b))
        W, b = gradient_descent(X, y, W, b, alpha)

    return costs


def read():
    filename = r"../../data/easy_test.xlsx"
    data = pd.read_excel(filename, header=None)
    x = data.iloc[2:, [0, ]].values
    y = data.iloc[2:, 1].values

    # 对特征数据 x 进行归一化
    x_normalized = normalize_data(x)

    return x_normalized, y


if __name__ == '__main__':
    x, y = read()
    costs = train(x, y)
    # print(costs)
  # 绘制损失曲线
    plt.figure()
    plt.plot(range(len(costs)), costs, marker='o', linestyle='-', color='b', label='Training Loss')
    plt.xlabel('Iteration')
    plt.ylabel('Cost')
    plt.title('Training Loss')
    plt.legend()
    plt.grid(True)
    plt.show()

从0实现逻辑回归

相关推荐
偶尔微微一笑11 分钟前
AI网络渗透kali应用(gptshell)
linux·人工智能·python·自然语言处理·编辑器
深度之眼26 分钟前
2025时间序列都有哪些创新点可做——总结篇
人工智能·深度学习·机器学习·时间序列
晓数43 分钟前
【硬核干货】JetBrains AI Assistant 干货笔记
人工智能·笔记·jetbrains·ai assistant
jndingxin1 小时前
OpenCV 图形API(60)颜色空间转换-----将图像从 YUV 色彩空间转换为 RGB 色彩空间函数YUV2RGB()
人工智能·opencv·计算机视觉
Sherlock Ma1 小时前
PDFMathTranslate:基于LLM的PDF文档翻译及双语对照的工具【使用教程】
人工智能·pytorch·语言模型·pdf·大模型·机器翻译·deepseek
知舟不叙1 小时前
OpenCV中的SIFT特征提取
人工智能·opencv·计算机视觉
kadog2 小时前
PubMed PDF下载 cloudpmc-viewer-pow逆向
前端·javascript·人工智能·爬虫·pdf
亿坊电商2 小时前
AI数字人多模态技术如何提升用户体验?
人工智能·ux·ai数字人
不吃香菜?3 小时前
PyTorch 实现食物图像分类实战:从数据处理到模型训练
人工智能·深度学习
Jackilina_Stone3 小时前
【论文阅读】平滑量化:对大型语言模型进行准确高效的训练后量化
人工智能·llm·量化·论文阅读笔记