例子:回归(多项式回归)

训练数据:text.csv
x,y
235,591
216,539
148,413
35,310
85,308
204,519
49,325
25,332
173,498
191,498
134,392
99,334
117,385
112,387
162,425
272,659
159,400
159,427
59,319
198,522
python
import numpy as np
import matplotlib.pyplot as plt
#读入训练数据
train = np.loadtxt('text.csv',delimiter=',',skiprows=1)
train_x = train[:,0]
train_y = train[:,1]
#展示训练数据
#plt.plot(train_x,train_y,'o')
#plt.show()
#标准化数据
mu = train_x.mean()
sigma = train_x.std()
def standardize(x):
return (x - mu)/sigma
train_z = standardize(train_x)
#plt.plot(train_z,train_y,'o')
#plt.show()
#均方误差
#在停止重复的条件里用上
def MSE(x,y):
return (1/x.shape[0])*np.sum((y-f(x)) ** 2)
#生成三个随机数 代表三个参数 theta是参数列表
theta = np.random.rand(3)
#均方误差的历史记录
errors = []
#创建训练数据的矩阵
#因为训练数据很多 把它们都放在一个矩阵里
#直接和theta相乘
#theta0 + theta1*x1 + theta2*x2
def to_matrix(x):
return np.vstack([np.ones(x.shape[0]),x,x**2]).T
X = to_matrix(train_z)
#预测函数
#theta0 + theta1*x1 + theta2*x2
#dot:矩阵乘法
def f(x):
return np.dot(x,theta)
#目标函数 error误差 最小二乘法
def E(x,y):
return 0.5*np.sum((y-f(x))**2)
#learning rate 学习率
ETA = 1e-3
#误差的差值
diff = 1;
#重复学习
errors.append(MSE(X,train_y))
error = E(X,train_y)
while diff>1e-2:
#更新参数
theta = theta - ETA*np.dot(f(X)-train_y,X)
#计算差值
errors.append(MSE(X,train_y))
current_error = E(X,train_y)
diff = errors[-2] - errors[-1]
#不用均方误差的diff
#diff = error - current_error
error = current_error
'''
图表拟合展示
x = np.linspace(-3,3,100)
plt.plot(train_z,train_y,'o')
plt.plot(x,f(to_matrix(x)))
plt.show()
'''
#绘制误差变化图
x = np.arange(len(errors))
plt.plot(x,errors)
plt.show()
