以Income、House age、Numbers Of Rooms、Population、Area为输入变量,建立多因子模型,预测合理房价price,评估模型表现。
代码如下:
python复制代码
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error,r2_score
from matplotlib import pyplot as plt
data=pd.read_csv('usa_housing_price.csv')
fig = plt.figure(figsize=[10,10])
fig1=plt.subplot(2,3,1) # 表示2行3列,第1个图
plt.scatter(data.loc[:,'Avg.Area Income'],data.loc[:,'Price'])
plt.title('Price vs Area Income')
fig2=plt.subplot(2,3,2) # 表示2行3列,第2个图
plt.scatter(data.loc[:,'Avg.Area House Age'],data.loc[:,'Price'])
plt.title('Price vs House Age')
fig3=plt.subplot(2,3,3) # 表示2行3列,第3个图
plt.scatter(data.loc[:,'Avg.Area Number of Rooms'],data.loc[:,'Price'])
plt.title('Price vs Avg.Area Number of Rooms')
# fig4=plt.subplot(2,3,4) # 表示2行3列,第4个图
# plt.scatter(data.loc[:,'Area Population'],data.loc[:,'Price'])
# plt.title('Price vs Area Population')
fig5=plt.subplot(2,3,5) # 表示2行3列,第5个图
plt.scatter(data.loc[:,'size'],data.loc[:,'Price'])
plt.title('Price vs size')
plt.show()
Y=data.loc[:,'Price']
# define X_multi
X_multi =data.drop(['Price'],axis=1) # 除了Price的变量都放入X_multi,不需要reshape
# set up 2nd linear model
LR_multi=LinearRegression()
LR_multi.fit(X_multi,Y)
# make prediction
y_predict_multi=LR_multi.predict(X_multi)
print(y_predict_multi)
# 模型评估
mean_squared_error_multi=mean_squared_error(Y,y_predict_multi)
r2_score_multi =r2_score(Y,y_predict_multi)
print(mean_squared_error_multi,r2_score_multi)
fig6=plt.figure(figsize=[8,5])
plt.scatter(Y,y_predict_multi) # 拟合真实Y和预测Y关系图像
plt.show()
# 使用模型进行预测Price
x_test=[65000,5,5,30000,200]
x_test=np.array(x_test).reshape(1,-1)
y_test_predict=LR_multi.predict(x_test)
print("预测房价",y_test_predict)