机器学习——5.案例: 乳腺癌预测

案例目的

通过已标注的数据，训练出模型来预测患者是否有患乳腺癌。

该问题属于二分类问题，所以可以使用Sigmoid激活函数，损失用BCE函数

代码逻辑步骤

读取数据
训练集与测试集拆分
数据标准化
数据转化为Pytorch张量
label维度转换
定义模型
定义损失计算函数
定义优化器
定义梯度下降函数
模型训练（正向传播、计算损失、反向传播、梯度清空）
模型测试
精度计算

代码实现

python 复制代码

import torch
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler


df = pd.read_csv('/Volumes/Sophia/机器学习/day03/code/breast_cancer.csv')
X = df[df.columns[0:-1]].values
Y = df[df.columns[-1]].values
# 数据集拆分
X_train,X_test,Y_train,Y_test = train_test_split(X,Y,test_size=0.2,random_state=5)

# 数据标准化
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.fit_transform(X_test)

# 转化为张量
X_train = torch.from_numpy(X_train.astype(np.float32))
X_test = torch.from_numpy(X_test.astype(np.float32))
Y_train = torch.from_numpy(Y_train.astype(np.float32))
Y_test = torch.from_numpy(Y_test.astype(np.float32))
# 标签转化为二维数据
# print(Y_train.shape)
Y_train = Y_train.view(Y_train.shape[0],-1)
Y_test = Y_test.view(Y_test.shape[0],-1)

# 定义模型
class Model(torch.nn.Module):
    def __init__(self,n_input_features):
        super(Model,self).__init__()
        self.linear = torch.nn.Linear(n_input_features,1)
    def forward(self,x):
        y = torch.sigmoid(self.linear(x))
        return y

n_features = X_train.shape[1]    
# 定义损失函数
model = Model(n_features)
loss = torch.nn.BCELoss()
# 定义优化器
# 学习率
learning_rate = 0.001
optimzier = torch.optim.SGD(model.parameters(),lr=learning_rate)
# 定义梯度下降函数
def gradient_descent():
    pre_y = model(X_train)
    l = loss(pre_y,Y_train)
    l.backward()
    optimzier.step()
    optimzier.zero_grad()
    return l,list(model.parameters())

# 模型训练
for i in range(500):
    l,pa = gradient_descent()
    if i % 50 == 0:
        print(l,pa)

# 模型测试
index = np.random.randint(0,X_test.shape[0])
pre = model(X_test[index])
print(pre,Y_test[index])

# 计算模型准确率
pres_y = model(X_test).round()
result = np.where(pres_y==Y_test,1,0)
ac = np.sum(result)/result.size
print(ac)