使用tensorflow的线性回归的例子(九)

from future import absolute_import, division, print_function, unicode_literals

import numpy as np

import pandas as pd

import seaborn as sb

import tensorflow as tf

from tensorflow import keras as ks

from tensorflow.estimator import LinearRegressor

from sklearn import datasets

from sklearn.model_selection import train_test_split

from sklearn.metrics import mean_squared_error, r2_score

print(tf.version)

"""## Load and configure the Boston Housing Dataset"""

boston_load = datasets.load_boston()

feature_columns = boston_load.feature_names

target_column = boston_load.target

boston_data = pd.DataFrame(boston_load.data, columns=feature_columns).astype(np.float32)

boston_data'MEDV' = target_column.astype(np.float32)

boston_data.head()

"""## Checking the relation between the variables using Pairplot and Correlation Graph"""

sb.pairplot(boston_data, diag_kind="kde", height=3, aspect=0.6)

correlation_data = boston_data.corr()

correlation_data.style.background_gradient(cmap='coolwarm', axis=None)

"""## Descriptive Statistics - Central Tendency and Dispersion"""

stats = boston_data.describe()

boston_stats = stats.transpose()

boston_stats

"""## Select the required columns"""

X_data = boston_data\[i for i in boston_data.columns if i not in \['MEDV']]

Y_data = boston_data\['MEDV']

"""## Train Test Split"""

training_features , test_features ,training_labels, test_labels = train_test_split(X_data , Y_data , test_size=0.2)

print('No. of rows in Training Features: ', training_features.shape0)

print('No. of rows in Test Features: ', test_features.shape0)

print('No. of columns in Training Features: ', training_features.shape1)

print('No. of columns in Test Features: ', test_features.shape1)

print('No. of rows in Training Label: ', training_labels.shape0)

print('No. of rows in Test Label: ', test_labels.shape0)

print('No. of columns in Training Label: ', training_labels.shape1)

print('No. of columns in Test Label: ', test_labels.shape1)

stats = training_features.describe()

stats = stats.transpose()

stats

stats = test_features.describe()

stats = stats.transpose()

stats

"""## Normalize Data"""

def norm(x):

stats = x.describe()

stats = stats.transpose()

return (x - stats'mean') / stats'std'

normed_train_features = norm(training_features)

normed_test_features = norm(test_features)

"""## Build the Input Pipeline for TensorFlow model"""

def feed_input(features_dataframe, target_dataframe, num_of_epochs=10, shuffle=True, batch_size=32):

def input_feed_function():

dataset = tf.data.Dataset.from_tensor_slices((dict(features_dataframe), target_dataframe))

if shuffle:

dataset = dataset.shuffle(2000)

dataset = dataset.batch(batch_size).repeat(num_of_epochs)

return dataset

return input_feed_function

train_feed_input = feed_input(normed_train_features, training_labels)

train_feed_input_testing = feed_input(normed_train_features, training_labels, num_of_epochs=1, shuffle=False)

test_feed_input = feed_input(normed_test_features, test_labels, num_of_epochs=1, shuffle=False)

"""## Model Training"""

feature_columns_numeric = tf.feature_column.numeric_column(m) for m in training_features.columns

linear_model = LinearRegressor(feature_columns=feature_columns_numeric, optimizer='RMSProp')

linear_model.train(train_feed_input)

"""## Predictions"""

train_predictions = linear_model.predict(train_feed_input_testing)

test_predictions = linear_model.predict(test_feed_input)

train_predictions_series = pd.Series(p\['predictions'0 for p in train_predictions])

test_predictions_series = pd.Series(p\['predictions'0 for p in test_predictions])

train_predictions_df = pd.DataFrame(train_predictions_series, columns='predictions')

test_predictions_df = pd.DataFrame(test_predictions_series, columns='predictions')

training_labels.reset_index(drop=True, inplace=True)

train_predictions_df.reset_index(drop=True, inplace=True)

test_labels.reset_index(drop=True, inplace=True)

test_predictions_df.reset_index(drop=True, inplace=True)

train_labels_with_predictions_df = pd.concat(training_labels, train_predictions_df, axis=1)

test_labels_with_predictions_df = pd.concat(test_labels, test_predictions_df, axis=1)

"""## Validation"""

def calculate_errors_and_r2(y_true, y_pred):

mean_squared_err = (mean_squared_error(y_true, y_pred))

root_mean_squared_err = np.sqrt(mean_squared_err)

r2 = round(r2_score(y_true, y_pred)*100,0)

return mean_squared_err, root_mean_squared_err, r2

train_mean_squared_error, train_root_mean_squared_error, train_r2_score_percentage = calculate_errors_and_r2(training_labels, train_predictions_series)

test_mean_squared_error, test_root_mean_squared_error, test_r2_score_percentage = calculate_errors_and_r2(test_labels, test_predictions_series)

print('Training Data Mean Squared Error = ', train_mean_squared_error)

print('Training Data Root Mean Squared Error = ', train_root_mean_squared_error)

print('Training Data R2 = ', train_r2_score_percentage)

print('Test Data Mean Squared Error = ', test_mean_squared_error)

print('Test Data Root Mean Squared Error = ', test_root_mean_squared_error)

print('Test Data R2 = ', test_r2_score_percentage)

相关推荐
2601_961875243 分钟前
法考资料2026|全套|资料已整理
数据结构·算法·链表·贪心算法·eclipse·线性回归·动态规划
羊羊小栈1 天前
基于GraphRAG的地质矿产知识管理系统(Neo4j_大语言模型)
人工智能·语言模型·自然语言处理·毕业设计·neo4j·大作业
金融小师妹2 天前
AI因子共振模型显示:金银比突破区间上沿,白银定价逻辑进入再校准阶段
人工智能·算法·均值算法·线性回归
夜郎king2 天前
SpringBoot 整合 Neo4j 实战:从零搭建经典小说知识图谱完整方案
spring boot·知识图谱·neo4j
羊羊小栈3 天前
基于知识图谱(Neo4j)和大语言模型(LLM)的图检索增强(GraphRAG)的数控车床主轴系统故障诊断智能问答系统
人工智能·语言模型·毕业设计·知识图谱·创业创新·neo4j·大作业
chloe23334 天前
【动手学深度学习】笔记1:简单的线性回归
笔记·深度学习·线性回归
_Rookie._4 天前
neo4j图形数据库 -基础语法
neo4j
wayz114 天前
Overlap:SLOPE(线性回归斜率)技术指标详解
算法·金融·数据分析·回归·线性回归·量化交易·特征工程
星川皆无恙4 天前
基于BERT+LSTM+CRF与知识图谱的医疗智能问答系统实战:Neo4j图数据库+实体识别+意图分析完整项目
数据库·人工智能·深度学习·bert·lstm·知识图谱·neo4j
千寻girling5 天前
一周没跑步了 ,今日跑步 5KM , 哑铃+健身 20min , 俯卧撑 30 个 ;
数据结构·c++·python·算法·leetcode·职场和发展·线性回归