来自Tensorflow 机器学习实战指南
手打代码…
和原书上还是有区别的…2 处
#!/usr/bin/env python3
# encoding:utf-8
# -*-coding:utf-8 -*-
import matplotlib.pyplot as plt
import numpy as np
import tensorflow as tf
import requests
from sklearn import datasets
from sklearn.preprocessing import normalize
from tensorflow.python.framework import ops
ops.reset_default_graph()
sess=tf.compat.v1.Session()
#A 原书上的https://www.umass.edu/statdata/statdata/data/lowbwt.dat 我的网络无法打开,换成了以下数据集
birthdata_url='https://github.com/nfmcclure/tensorflow_cookbook/raw/master/01_Introduction/07_Working_with_Data_Sources/birthweight_data/birthweight.dat'
birth_file=requests.get(birthdata_url)
#print(birth_file.text)
# B下面几行中也有和原书不一样的
birth_data = birth_file.text.split('\r\n')
birth_header = birth_data[0].split('\t')
#birth_header = [x for x in birth_data[0].split(' ') if len(x)>=1]
birth_data = [[float(x) for x in y.split('\t') if len(x) >= 1] for y in birth_data[1:] if len(y) >= 1]
#birth_data = [[float(x) for x in y.split(' ') if len(x)>=1] for y in birth_data[1:] if len(y)>=1]
#print(len(birth_data))
#print(len(birth_data[0]))
y_vals=np.array([x[0] for x in birth_data])
x_vals=np.array([x[1:9] for x in birth_data])
train_indices=np.random.choice(len(x_vals),round(len(x_vals)*0.8),replace=False)
test_indices=np.array(list(set(range(len(x_vals)))-set(train_indices)))
x_vals_train=x_vals[train_indices]
x_vals_test=x_vals[test_indices]
y_vals_train=y_vals[train_indices]
y_vals_test=y_vals[test_indices]
#print(x_vals_test)
#把所有特征缩放到0到1
def normalize_cols(m):
col_max=m.max(axis=0)
col_min=m.min(axis=0)
return (m-col_min)/(col_max-col_min)
x_vals_train=np.nan_to_num(normalize_cols(x_vals_train))
x_vals_test=np.nan_to_num(normalize_cols(x_vals_test))
print(x_vals_test.shape)
#print(x_vals)
# iris=datasets.load_iris()
# #print(iris.data.shape)
# x_vals=np.array([x[3] for x in iris.data])
# y_vals=np.array([y[0] for y in iris.data])
leaning_rate=0.05
batch_size=25
# interations=50
x_data=tf.placeholder(shape=[None,8],dtype=tf.float32)
y_target=tf.placeholder(shape=[None,1],dtype=tf.float32)
A=tf.Variable(tf.random_normal(shape=[8,1]))
b=tf.Variable(tf.random_normal(shape=[1,1]))
model_output=tf.add(tf.matmul(x_data,A),b)
# #demming operation
# demming_numberator=tf.abs(tf.subtract(y_target,tf.add(tf.matmul(x_data,A),b)))
# demming_denominator=tf.sqrt(tf.add(tf.square(A),1))
loss=tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(labels=y_target,logits=model_output))
init=tf.compat.v1.global_variables_initializer()
sess.run(init)
my_ops=tf.compat.v1.train.GradientDescentOptimizer(leaning_rate)
train_step=my_ops.minimize(loss)
prediction=tf.round(tf.sigmoid(model_output))
predictions_correct=tf.cast(tf.equal(prediction,y_target),tf.float32)
accuracy=tf.reduce_mean(predictions_correct)
lose_vec=[]
train_acc=[]
test_acc=[]
for i in range(1500):
rand_index=np.random.choice(len(x_vals_train),size=batch_size)
#rand_x=np.transpose([x_vals_train[rand_index]])
rand_y=np.transpose([y_vals_train[rand_index]])
rand_x=x_vals_train[rand_index]
#rand_y=y_vals_train[rand_index]
sess.run(train_step,feed_dict={x_data:rand_x,y_target:rand_y})
temp_lose=sess.run(loss,feed_dict={x_data:rand_x,y_target:rand_y})
lose_vec.append(temp_lose)
temp_acc_train=sess.run(accuracy,feed_dict={x_data:x_vals_train,y_target:np.transpose([y_vals_train])})
train_acc.append(temp_acc_train)
temp_acc_test=sess.run(accuracy,feed_dict={x_data:x_vals_test,y_target:np.transpose([y_vals_test])})
test_acc.append(temp_acc_test)
plt.plot(lose_vec,'k-')
plt.title('crross Entropy Loss per Generation')
plt.xlabel('Generation')
plt.ylabel('Crocss entropy loss')
plt.show()
plt.plot(train_acc,'k-',label='Train Set Accuracy')
plt.plot(test_acc,'r--',label='Test Set Accuracy')
plt.title('arccurce Train Vs Test')
plt.xlabel('Generation')
plt.ylabel('arccurce')
plt.show()
问题:
测试的准确率有时候好低,有时候和训练的准确率差不多.