【学习笔记】合成特征与离群值后记

最新推荐文章于 2023-01-01 16:30:13 发布

Canon__

最新推荐文章于 2023-01-01 16:30:13 发布

阅读量263

点赞数

本文链接：https://blog.csdn.net/Canon__/article/details/82751419

版权

import numpy as np
import tensorflow as tf
from tensorflow.data import Dataset
import pandas as pd


def my_input_fn(features, targets, batch_size=1, shuffle=True, num_epochs=None):
    """Trains a linear regression model of one feature.

    Args:
      features: pandas DataFrame of features
      targets: pandas DataFrame of targets
      batch_size: Size of batches to be passed to the model
      shuffle: True or False. Whether to shuffle the data.
      num_epochs: Number of epochs for which data should be repeated. None = repeat indefinitely
    Returns:
      Tuple of (features, labels) for next data batch
    """

    # Convert pandas data into a dict of np arrays.
    features = {key: np.array(value) for key, value in dict(features).items()}

    # Construct a dataset, and configure batching/repeating.
    ds = Dataset.from_tensor_slices((features, targets))  # warning: 2GB limit
    ds = ds.batch(batch_size).repeat(num_epochs)

    # Shuffle the data, if specified.
    if shuffle:
        ds = ds.shuffle(buffer_size=10000)

    # Return the next batch of data.
    features, labels = ds.make_one_shot_iterator().get_next()
    return features, labels

def add_layer(inputs,input_size, output_size, activation_function=None):
    weights = tf.Variable(tf.random_normal([input_size,output_size]))
    biases = tf.Variable(tf.zeros(output_size))
    wx_b = tf.matmul(inputs, weights) + biases
    if activation_function is None:
        output = wx_b
    else:
        output = activation_function(wx_b)
    return output

df = pd.read_csv('california_housing_train.csv')
df['median_house_value'] /= 1000
df = df.reindex(np.random.permutation(df.index))
df['rooms_per_person'] = df['total_rooms']/ df['population']

x1 = df[['rooms_per_person']].astype('float32')
y1 = df['median_house_value'].astype('float32')

xs, ys = my_input_fn(x1, y1, batch_size=2000)
xs = tf.expand_dims(xs['rooms_per_person'], -1)
l1 = add_layer(xs, 1, 10, activation_function=tf.nn.tanh)
pred = add_layer(l1, 10, 1)

loss = tf.sqrt(tf.reduce_mean(tf.square(pred - ys)))

train_step = tf.train.AdamOptimizer(0.1).minimize(loss)

sess = tf.Session()
init = tf.global_variables_initializer()
sess.run(init)

for i in range(1000):
    sess.run(train_step)
    if i % 50 == 0:
        print(sess.run(loss))

经过努力终于把原文dataset的输出结果导入到我们的框架了，这里xs ys 返回的是一个tensor, xs为字典格式，这里我们通过提取xs 的values并且将其变形为(batch_size, 1)的格式，放入我们的矩阵中进行运算。这里的my_input_fn函数完全是原文搬过来的。

Canon__

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
【学习笔记】合成特征与离群值后记

import numpy as npimport tensorflow as tffrom tensorflow.data import Datasetimport pandas as pddef my_input_fn(features, targets, batch_size=1, shuffle=True, num_epochs=None): """Trains a l...
复制链接

扫一扫