分类问题-机器学习

  1. 分类:感知机

    简单判断图片是纵向还是横向

    训练数据:images1.csv

    复制代码
    x1,x2,y
    153,432,-1
    220,262,-1
    118,214,-1
    474,384,1
    485,411,1
    233,430,-1
    396,361,1
    484,349,1
    429,259,1
    286,220,1
    399,433,-1
    403,340,1
    252,34,1
    497,472,1
    379,416,-1
    76,163,-1
    263,112,1
    26,193,-1
    61,473,-1
    420,253,1
    python 复制代码
    import numpy as np
    import matplotlib.pyplot as plt
    
    train = np.loadtxt('images1.csv',delimiter=',',skiprows=1)
    
    #取第一列和第二列
    train_x = train[:,0:2]
    #取第三列
    train_y = train[:,2]
    
    #plt.plot(train_x[train_y == 1,0],train_x[train_y == 1,1],'o')
    #plt.plot(train_x[train_y==-1,0],train_x[train_y==-1,1],'x')
    
    #plt.axis('scaled')
    #plt.show()
    
    #权重初始化
    #w·x = w1x1 + w2x2 = 0
    w = np.random.rand(2)
    
    #判别函数
    def f(x):
        if np.dot(w,x)>=0:
            return 1
        else:
            return -1;
    
    #迭代次数
    epoch = 10
    #更新次数
    count = 0
    
    #学习权重
    for _ in range(epoch):
        for x,y in zip(train_x,train_y):
            if f(x) != y:
                w = w + y*x
                #输出日志
                count += 1
                print('第{}次:w={}'.format(count,w))
    
    #w·x = w1x1 + w2x2 = 0
    #x2 = -w1/w2*x1
    x1 = np.arange(0,500)
    plt.plot(train_x[train_y == 1,0],train_x[train_y==1,1],'o')
    plt.plot(train_x[train_y==-1,0],train_x[train_y==-1,1],'x')
    plt.plot(x1,-w[0]/w[1]*x1,linestyle = 'dashed')
    plt.show()
    
    #预测
    #200x100 横向
    print(f([200,100]))
    
    #100x200 纵向
    print(f([100,200]))
  2. 分类:逻辑回归

    训练数据:images2.csv

    复制代码
    x1,x2,y
    153,432,0
    220,262,0
    118,214,0
    474,384,1
    485,411,1
    233,430,0
    396,361,1
    484,349,1
    429,259,1
    286,220,1
    399,433,0
    403,340,1
    252,34,1
    497,472,1
    379,416,0
    76,163,0
    263,112,1
    26,193,0
    61,473,0
    420,253,1
    python 复制代码
    import numpy as np
    import matplotlib.pyplot as plt
    
    #读入
    train = np.loadtxt('images2.csv',delimiter=',',skiprows=1)
    train_x = train[:,0:2]
    train_y = train[:,2]
    
    #初始化参数
    theta = np.random.rand(3)
    
    #标准化
    #axis=0会计算每列的平均值和标准差
    mu = train_x.mean(axis=0)
    sigma = train_x.std(axis=0)
    def standardize(x):
        return (x-mu)/sigma
    
    train_z = standardize(train_x)
    
    #增加x0
    def to_matrix(x):
        #创建和x1一样的行一列的矩阵
        x0 = np.ones([x.shape[0],1])
        #参数合并成一个矩阵
        return np.hstack([x0,x])
    
    X = to_matrix(train_z)
    
    #可视化
    '''
    plt.plot(train_z[train_y==1,0],train_z[train_y==1,1],'o')
    plt.plot(train_z[train_y==0,0],train_z[train_y==0,1],'x')
    plt.show()
    '''
    
    #sigmoid函数
    def f(x):
        return 1/(1+np.exp(-np.dot(x,theta)))
    
    #学习率
    ETA = 1e-3
    
    #迭代次数
    epoch = 5000
    
    #重复学习
    for _ in range(epoch):
        theta = theta - ETA*np.dot(f(X)-train_y,X)
    
    #theta.Tx = 0
    #theta.Tx = theta0x0 + theta1x1 + theta2x2 = 0
    #x2 = -(theta0 + theta1*x1)/theta2
    
    x0 = np.linspace(-2,2,100)
    plt.plot(train_z[train_y==1,0],train_z[train_y==1,0],'o')
    plt.plot(train_z[train_y==0,0],train_z[train_y==0,0],'x')
    plt.plot(x0,-(theta[0]+theta[1]*x0)/theta[2],linestyle='dashed')
    plt.show()
    
    #预测
    #astype(np.int_):将布尔值转为整数(True→1,False→0),最终输出0或1的分类结果。
    def classify1(x):
        return (f(x)>=0.5).astype(np.int_)
    
    array = classify1(to_matrix(standardize([
        [200,100],
        [100,200]
    ])))
    
    print(array)
  3. 分类:线性不可分分类问题

    训练数据:

    复制代码
    x1,x2,y
    0.54508775,2.34541183,0
    0.32769134,13.43066561,0
    4.42748117,14.74150395,0
    2.98189041,-1.81818172,1
    4.02286274,8.90695686,1
    2.26722613,-6.61287392,1
    -2.66447221,5.05453871,1
    -1.03482441,-1.95643469,1
    4.06331548,1.70892541,1
    2.89053966,6.07174283,0
    2.26929206,10.59789814,0
    4.68096051,13.01153161,1
    1.27884366,-9.83826738,1
    -0.1485496,12.99605136,0
    -0.65113893,10.59417745,0
    3.69145079,3.25209182,1
    -0.63429623,11.6135625,0
    0.17589959,5.84139826,0
    0.98204409,-9.41271559,1
    -0.11094911,6.27900499,0
    python 复制代码
    import numpy as np
    import matplotlib.pyplot as plt
    
    #读入
    train = np.loadtxt('data3.csv',delimiter=',',skiprows=1)
    train_x = train[:,0:2]
    train_y = train[:,2]
    
    '''
    plt.plot(train_x[train_y==1,0],train_x[train_y==1,1],'o')
    plt.plot(train_x[train_y==0,0],train_x[train_y==0,1],'x')
    plt.show()
    '''
    
    #参数初始化
    theta = np.random.rand(4)
    
    #精度历史记录
    accuracies = []
    
    #标准化
    mu = train_x.mean(axis=0)
    sigma = train_x.std(axis=0)
    def standardize(x):
        return (x-mu)/sigma
    
    train_z = standardize(train_x)
    
    #增加x0和x3
    def to_matrix(x):
        x0 = np.ones([x.shape[0],1])
        x3 = x[:,0,np.newaxis]**2
        return np.hstack([x0,x,x3])
    
    X = to_matrix(train_z)
    
    #sigmoid函数
    def f(x):
        return 1/(1+np.exp(-np.dot(x,theta)))
    
    #学习率
    ETA = 1e-3
    
    #迭代次数
    epoch = 5000
    
    def classify1(x):
        return (f(x)>=0.5).astype(np.int_)
    
    #重复学习
    for _ in range(epoch):
        theta = theta - ETA*np.dot(f(X)-train_y,X)
        #计算现在精度
        result = classify1(X) == train_y
        accuracy = len(result[result==True])/len(result)
        accuracies.append(accuracy)
    
    #theta.Tx = theta0x0 + theta1x1 + theta2x2 + theta3x3^2
    #         = theta0 + theta1x1 + theta2x2 + theta3x1^2 = 0
    #x2 = -(theta0+theta1x1+theta3x1^2)/theta2
    x1 = np.linspace(-2,2,100)
    x2 = -(theta[0]+theta[1]*x1+theta[3]*x1**2)/theta[2]
    plt.plot(train_z[train_y==1,0],train_z[train_y==1,1],'o')
    plt.plot(train_z[train_y==0,0],train_z[train_y==0,1],'x')
    plt.plot(x1,x2,linestyle='dashed')
    plt.show()
    
    #绘制acc曲线
    # x = np.arange(len(accuracies))
    # plt.plot(x,accuracies)
    # plt.show()

    因为训练数据过少只有20个 精度值只能为0.05的整数倍 所以acc曲线有棱有角:

  4. 分类:线性不可分分类问题 随机梯度下降法的实现

    训练数据:同上

    python 复制代码
    import numpy as np
    import matplotlib.pyplot as plt
    
    #读入
    train = np.loadtxt('data3.csv',delimiter=',',skiprows=1)
    train_x = train[:,0:2]
    train_y = train[:,2]
    
    '''
    plt.plot(train_x[train_y==1,0],train_x[train_y==1,1],'o')
    plt.plot(train_x[train_y==0,0],train_x[train_y==0,1],'x')
    plt.show()
    '''
    
    #参数初始化
    theta = np.random.rand(4)
    
    #精度历史记录
    accuracies = []
    
    #标准化
    mu = train_x.mean(axis=0)
    sigma = train_x.std(axis=0)
    def standardize(x):
        return (x-mu)/sigma
    
    train_z = standardize(train_x)
    
    #增加x0和x3
    def to_matrix(x):
        x0 = np.ones([x.shape[0],1])
        x3 = x[:,0,np.newaxis]**2
        return np.hstack([x0,x,x3])
    
    X = to_matrix(train_z)
    
    #sigmoid函数
    def f(x):
        return 1/(1+np.exp(-np.dot(x,theta)))
    
    #学习率
    ETA = 1e-3
    
    #迭代次数
    epoch = 5000
    
    def classify1(x):
        return (f(x)>=0.5).astype(np.int_)
    
    #重复学习
    for _ in range(epoch):
        #使用随机梯度下降法更新参数
        p = np.random.permutation(X.shape[0])
        for x,y in zip(X[p,:],train_y[p]):
            theta = theta - ETA*(f(x)-y)*x
    
    #theta.Tx = theta0x0 + theta1x1 + theta2x2 + theta3x3^2
    #         = theta0 + theta1x1 + theta2x2 + theta3x1^2 = 0
    #x2 = -(theta0+theta1x1+theta3x1^2)/theta2
    x1 = np.linspace(-2,2,100)
    x2 = -(theta[0]+theta[1]*x1+theta[3]*x1**2)/theta[2]
    plt.plot(train_z[train_y==1,0],train_z[train_y==1,1],'o')
    plt.plot(train_z[train_y==0,0],train_z[train_y==0,1],'x')
    plt.plot(x1,x2,linestyle='dashed')
    plt.show()