【学习笔记】合成特征与离群值 后记

import numpy as np
import tensorflow as tf
from tensorflow.data import Dataset
import pandas as pd


def my_input_fn(features, targets, batch_size=1, shuffle=True, num_epochs=None):
    """Trains a linear regression model of one feature.

    Args:
      features: pandas DataFrame of features
      targets: pandas DataFrame of targets
      batch_size: Size of batches to be passed to the model
      shuffle: True or False. Whether to shuffle the data.
      num_epochs: Number of epochs for which data should be repeated. None = repeat indefinitely
    Returns:
      Tuple of (features, labels) for next data batch
    """

    # Convert pandas data into a dict of np arrays.
    features = {key: np.array(value) for key, value in dict(features).items()}

    # Construct a dataset, and configure batching/repeating.
    ds = Dataset.from_tensor_slices((features, targets))  # warning: 2GB limit
    ds = ds.batch(batch_size).repeat(num_epochs)

    # Shuffle the data, if specified.
    if shuffle:
        ds = ds.shuffle(buffer_size=10000)

    # Return the next batch of data.
    features, labels = ds.make_one_shot_iterator().get_next()
    return features, labels

def add_layer(inputs,input_size, output_size, activation_function=None):
    weights = tf.Variable(tf.random_normal([input_size,output_size]))
    biases = tf.Variable(tf.zeros(output_size))
    wx_b = tf.matmul(inputs, weights) + biases
    if activation_function is None:
        output = wx_b
    else:
        output = activation_function(wx_b)
    return output

df = pd.read_csv('california_housing_train.csv')
df['median_house_value'] /= 1000
df = df.reindex(np.random.permutation(df.index))
df['rooms_per_person'] = df['total_rooms']/ df['population']

x1 = df[['rooms_per_person']].astype('float32')
y1 = df['median_house_value'].astype('float32')

xs, ys = my_input_fn(x1, y1, batch_size=2000)
xs = tf.expand_dims(xs['rooms_per_person'], -1)
l1 = add_layer(xs, 1, 10, activation_function=tf.nn.tanh)
pred = add_layer(l1, 10, 1)

loss = tf.sqrt(tf.reduce_mean(tf.square(pred - ys)))

train_step = tf.train.AdamOptimizer(0.1).minimize(loss)

sess = tf.Session()
init = tf.global_variables_initializer()
sess.run(init)

for i in range(1000):
    sess.run(train_step)
    if i % 50 == 0:
        print(sess.run(loss))


经过努力终于把原文dataset的输出结果导入到我们的框架了,这里xs ys 返回的是一个tensor, xs为字典格式,这里我们通过提取xs 的values并且将其变形为(batch_size, 1)的格式,放入我们的矩阵中进行运算。这里的my_input_fn函数完全是原文搬过来的。

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值