import numpy as np
import tensorflow as tf
from tensorflow.data import Dataset
import pandas as pd
def my_input_fn(features, targets, batch_size=1, shuffle=True, num_epochs=None):
"""Trains a linear regression model of one feature.
Args:
features: pandas DataFrame of features
targets: pandas DataFrame of targets
batch_size: Size of batches to be passed to the model
shuffle: True or False. Whether to shuffle the data.
num_epochs: Number of epochs for which data should be repeated. None = repeat indefinitely
Returns:
Tuple of (features, labels) for next data batch
"""
# Convert pandas data into a dict of np arrays.
features = {key: np.array(value) for key, value in dict(features).items()}
# Construct a dataset, and configure batching/repeating.
ds = Dataset.from_tensor_slices((features, targets)) # warning: 2GB limit
ds = ds.batch(batch_size).repeat(num_epochs)
# Shuffle the data, if specified.
if shuffle:
ds = ds.shuffle(buffer_size=10000)
# Return the next batch of data.
features, labels = ds.make_one_shot_iterator().get_next()
return features, labels
def add_layer(inputs,input_size, output_size, activation_function=None):
weights = tf.Variable(tf.random_normal([input_size,output_size]))
biases = tf.Variable(tf.zeros(output_size))
wx_b = tf.matmul(inputs, weights) + biases
if activation_function is None:
output = wx_b
else:
output = activation_function(wx_b)
return output
df = pd.read_csv('california_housing_train.csv')
df['median_house_value'] /= 1000
df = df.reindex(np.random.permutation(df.index))
df['rooms_per_person'] = df['total_rooms']/ df['population']
x1 = df[['rooms_per_person']].astype('float32')
y1 = df['median_house_value'].astype('float32')
xs, ys = my_input_fn(x1, y1, batch_size=2000)
xs = tf.expand_dims(xs['rooms_per_person'], -1)
l1 = add_layer(xs, 1, 10, activation_function=tf.nn.tanh)
pred = add_layer(l1, 10, 1)
loss = tf.sqrt(tf.reduce_mean(tf.square(pred - ys)))
train_step = tf.train.AdamOptimizer(0.1).minimize(loss)
sess = tf.Session()
init = tf.global_variables_initializer()
sess.run(init)
for i in range(1000):
sess.run(train_step)
if i % 50 == 0:
print(sess.run(loss))
经过努力终于把原文dataset的输出结果导入到我们的框架了,这里xs ys 返回的是一个tensor, xs为字典格式,这里我们通过提取xs 的values并且将其变形为(batch_size, 1)的格式,放入我们的矩阵中进行运算。这里的my_input_fn函数完全是原文搬过来的。