逻辑回归 Logistic regression

这个脚本展示如何用TensorFlow求解逻辑回归。 =(×+)y=sigmoid(A×x+b)

我们使用低出生重量数据,特别地:

```

y = 0 or 1 = low birth weight

x = demographic and medical history data

import matplotlib.pyplot as plt

import numpy as np

import tensorflow as tf

import requests

from tensorflow.python.framework import ops

import os.path

import csv

ops.reset_default_graph()

#tf.set_random_seed(42)

np.random.seed(42)

name of data file

birth_weight_file = 'birth_weight1.csv'

download data and create data file if file does not exist in current directory

#if not os.path.exists(birth_weight_file):

birthdata_url = 'https://github.com/nfmcclure/tensorflow_cookbook/raw/master/01_Introduction/07_Working_with_Data_Sources/birthweight_data/birthweight.dat'

birth_file = requests.get(birthdata_url)

birth_data = birth_file.text.split('\r\n')

#birth_header = birth_data[0].split('\t')

#birth_data = [[float(x) for x in y.split('\t') if len(x)>=1] for y in birth_data[1:] if len(y)>=1]

#with open(birth_weight_file, 'w', newline='') as f:

writer = csv.writer(f)

writer.writerow(birth_header)

writer.writerows(birth_data)

#f.close()

read birth weight data into memory

birth_data = []

with open(birth_weight_file, newline='') as csvfile:

csv_reader = csv.reader(csvfile)

birth_header = next(csv_reader)

for row in csv_reader:

birth_data.append(row)

birth_data = [[float(x) for x in row] for row in birth_data]

Pull out target variable

y_vals = np.array([x[0] for x in birth_data])

Pull out predictor variables (not id, not target, and not birthweight)

x_vals = np.array([x[1:8] for x in birth_data])

set for reproducible results

seed = 99

np.random.seed(seed)

#tf.set_random_seed(seed)

Split data into train/test = 80%/20%

train_indices = np.random.choice(len(x_vals), round(len(x_vals)*0.8), replace=False)

test_indices = np.array(list(set(range(len(x_vals))) - set(train_indices)))

x_vals_train = x_vals[train_indices]

x_vals_test = x_vals[test_indices]

y_vals_train = y_vals[train_indices]

y_vals_test = y_vals[test_indices]

Normalize by column (min-max norm)

def normalize_cols(m, col_min=np.array([None]), col_max=np.array([None])):

if not col_min[0]:

col_min = m.min(axis=0)

if not col_max[0]:

col_max = m.max(axis=0)

return (m-col_min) / (col_max - col_min), col_min, col_max

x_vals_train, train_min, train_max = np.nan_to_num(normalize_cols(x_vals_train))

x_vals_test, _, _ = np.nan_to_num(normalize_cols(x_vals_test, train_min, train_max))

def model(x,w,b):

Declare model operations

model_output = tf.add(tf.matmul(x, w), b)

return model_output

def loss1(x,y,w,b):

Declare Deming loss function

loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=model(x,w,b), labels=y))

return loss

def grad1(x,y,w,b):

with tf.GradientTape() as tape:

loss_1 = loss1(x,y,w,b)

return tape.gradient(loss_1,[w,b])

Declare batch size

batch_size = 25

learning_rate = 0.25 # Will not converge with learning rate at 0.4

iterations = 50

Create variables for linear regression

w1 = tf.Variable(tf.random.normal(shape=[7,1]),tf.float32)

b1 = tf.Variable(tf.random.normal(shape=[1,1]),tf.float32)

optimizer = tf.optimizers.Adam(learning_rate)

Training loop

loss_vec = []

train_acc = []

test_acc = []

for i in range(5000):

rand_index = np.random.choice(len(x_vals_train), size=batch_size)

rand_x = x_vals_train[rand_index]

rand_y = np.transpose([y_vals_train[rand_index]])

x=tf.cast(rand_x,tf.float32)

y=tf.cast(rand_y,tf.float32)

grads1=grad1(x,y,w1,b1)

optimizer.apply_gradients(zip(grads1,[w1,b1]))

#sess.run(train_step, feed_dict={x_data: rand_x, y_target: rand_y})

temp_loss1 = loss1(x, y,w1,b1).numpy()

#sess.run(loss, feed_dict={x_data: rand_x, y_target: rand_y})

loss_vec.append(temp_loss1)

Actual Prediction

#prediction = tf.round(tf.sigmoid(model_output))

#predictions_correct = tf.cast(tf.equal(prediction, y_target), tf.float32)

#accuracy = tf.reduce_mean(predictions_correct)

prediction1 = tf.round(tf.sigmoid(model(tf.cast(x_vals_train,tf.float32),w1,b1)))

predictions_correct1 = tf.cast(tf.equal(prediction1, tf.cast(np.transpose([y_vals_train]),tf.float32)), tf.float32)

temp_acc_train = tf.reduce_mean(predictions_correct1)

train_acc.append(temp_acc_train)

prediction2 = tf.round(tf.sigmoid(model(tf.cast(x_vals_test,tf.float32),w1,b1)))

predictions_correct2 = tf.cast(tf.equal(prediction2, tf.cast(np.transpose([y_vals_test]),tf.float32)), tf.float32)

temp_acc_test=tf.reduce_mean(predictions_correct2)

test_acc.append(temp_acc_test)

if (i+1)%25==0:

print('Step #' + str(i+1) + ' A = ' + str(w1.numpy()) + ' b = ' + str(b1.numpy()))

print('Loss = ' + str(temp_loss1))

%matplotlib inline

Plot loss over time

plt.plot(loss_vec, 'k-')

plt.title('Cross Entropy Loss per Generation')

plt.xlabel('Generation')

plt.ylabel('Cross Entropy Loss')

plt.show()

Plot train and test accuracy

plt.plot(train_acc, 'k-', label='Train Set Accuracy')

plt.plot(test_acc, 'r--', label='Test Set Accuracy')

plt.title('Train and Test Accuracy')

plt.xlabel('Generation')

plt.ylabel('Accuracy')

plt.legend(loc='lower right')

plt.show()

用TensorFlow进行逻辑回归(三)

y = 0 or 1 = low birth weight

x = demographic and medical history data

name of data file

download data and create data file if file does not exist in current directory

birthdata_url = 'https://github.com/nfmcclure/tensorflow_cookbook/raw/master/01_Introduction/07_Working_with_Data_Sources/birthweight_data/birthweight.dat'

birth_file = requests.get(birthdata_url)

birth_data = birth_file.text.split('\r\n')

writer = csv.writer(f)

writer.writerow(birth_header)

writer.writerows(birth_data)

read birth weight data into memory

Pull out target variable

Pull out predictor variables (not id, not target, and not birthweight)

set for reproducible results

Split data into train/test = 80%/20%

Normalize by column (min-max norm)

Declare model operations

Declare Deming loss function

Declare batch size

Declare batch size

Create variables for linear regression

Training loop

Training loop

Actual Prediction

Plot loss over time

Plot train and test accuracy