【学习笔记】 训练神经网络编程练习(可能存在问题)

我们这里先按照原文要求把输入特征线性映射到-1到1区间

import pandas as pd

pd.set_option('display.max_columns', 10)

def linear_scale(array):
    min = array.min()
    max = array.max()
    scale = (max - min)/2
    array = ((array - min)/scale) - 1
    return array


def normalizer_linear_scale(df):
    for i in range(len(df.columns)):
        df[df.columns[i]] = linear_scale(df[df.columns[i]])
    return df


df = pd.read_csv('california_housing_train.csv').copy()
df_targets = df['median_house_value'].copy()

get_linear = normalizer_linear_scale(df)
get_linear['median_house_value'] = df_targets

print(get_linear.describe())

get_linear.to_csv('normalizer_linear_scale.csv')

我们先来进行可视化。

import pandas as pd
import matplotlib.pyplot as plt
from scipy import stats

df = pd.read_csv('normalizer_linear_scale.csv', index_col=0)


plt.figure(1, figsize=(20, 6))


for i in range(len(df.columns)):
    plt.subplot(3, 4, i + 1)
    x, bins, patches = plt.hist(df[df.columns[i]], 30, density=1, facecolor='blue', alpha=0.5)
    y = stats.norm.pdf(bins, df[df.columns[i]].mean(), df[df.columns[i]].std())
    plt.plot(bins, y, 'r--')
    plt.plot((bins[: -1] + bins[1 :])/2, x/2, 'g--')
    plt.title('%s' % df.columns[i])
    plt.subplots_adjust(left=0.15, wspace=0.5, hspace=0.5)

plt.show()

可视化结果与原文一致,下一步我们构建神经网络。

import tensorflow as tf
from tensorflow.data import Dataset


class _dnn():
    def my_fn_input(self, features, targets, batch_size=1, num_epochs=1, shuffle=True):
        # num_epochs > batch * train steps
        features = {key: value for key, value in features.items()}
        ds = Dataset.from_tensor_slices((features, targets))
        ds = ds.batch(batch_size).repeat(num_epochs)
        if shuffle:
            ds.shuffle(12000)
        features, labels = ds.make_one_shot_iterator().get_next()
        return features, labels

    def get_features(self, xs_dict):
        feautres = list(xs_dict.values())
        outputs = []
        for i in feautres:
            outputs.append(tf.expand_dims(i, -1))
        outputs = tf.concat([i for i in outputs], -1)
        return outputs

    def add_layer(self, inputs, input_size, output_size, activation_function=None, regularization=0.001):
        weights = tf.Variable(tf.random_normal([input_size, output_size], stddev=.1))
        # l2 regularizer, lambda:0.0005
        tf.add_to_collection('losses', tf.contrib.layers.l2_regularizer(regularization)(weights))
        biases = tf.Variable(tf.zeros([output_size]) + 0.1)
        wx_b = tf.matmul(inputs, weights) + biases
        if activation_function is None:
            outputs = wx_b

        else:
            outputs = activation_function(wx_b)
        return weights, biases, outputs

    def _loss(self, pred, ys, regularizer=False):
        # rmse + lambda*l2regularizer
        rmse = tf.sqrt(tf.reduce_mean(tf.square(pred - ys)))
        if regularizer:
            loss = rmse + tf.add_n(tf.get_collection('losses'))
        else:
            loss = rmse
        return loss

    def train_step(self, learning_rate, loss):
        train = tf.train.AdamOptimizer(learning_rate).minimize(loss)
        # train = tf.train.GradientDescentOptimizer(learning_rate)
        # train = tf.contrib.estimator.clip_gradients_by_norm(train, 5.0).minimize(loss)
        return train


这里直接调用的adam,关于GradientDescent的方法文中已经给出并且comment掉了。

import numpy as np
from improving import _dnn
import pandas as pd
import tensorflow as tf
dnn = _dnn()
df = pd.read_csv('normalizer_linear_scale.csv', index_col=0)
df = df.reindex(np.random.permutation(df.index))
df = df.sort_index()
df_features = df[['longitude', 'latitude',
                  'housing_median_age', 'total_rooms', 'total_bedrooms', 'population',
                  'households', 'median_income', 'rooms_per_person']]
df_targets = df['median_house_value']
train_features = df_features.head(12000).astype('float32')
train_targets = df_targets.head(12000).astype('float32')
validation_features = df_features.tail(5000).astype('float32')
validation_targets = df_targets.tail(5000).astype('float32')

xs, ys = dnn.my_fn_input(train_features, train_targets, batch_size=100, num_epochs=30, shuffle=True)
vx, vy = dnn.my_fn_input(validation_features, validation_targets, batch_size=5000, num_epochs=10, shuffle=False)
xs = dnn.get_features(xs)
vx = dnn.get_features(vx)

w1, b1, l1 = dnn.add_layer(xs, 9, 10)
w2, b2, l2 = dnn.add_layer(l1, 10, 10)
w3, b3, pred = dnn.add_layer(l2, 10, 1)
loss = dnn._loss(pred, ys, regularizer=False)
train_step = dnn.train_step(0.1, loss)

sess = tf.Session()
init = tf.global_variables_initializer()
sess.run(init)

_vl1 = tf.matmul(vx, w1) + b1
_vl2 = tf.matmul(_vl1, w2) + b2
_vpred = tf.matmul(_vl2, w3) + b3
_vloss = dnn._loss(_vpred, vy, regularizer=False)

for i in range(3000):
    sess.run(train_step)
    if i % 300 == 0:
        print('validation loss:', sess.run(_vloss))

训练结果与原文不一致,原文在rmse70左右徘徊,该网络在rmse在115左右。lr,weights分布,正则化率均更改过,均在115左右徘徊,如果我们这里batch_size过大的话,则会让运行速度非常非常慢,但是因为我们总体样本非常小,这里我们直接用placeholder来处理。

import tensorflow as tf
import numpy as np
import pandas as pd
from improving import _dnn

df = pd.read_csv('normalizer_linear_scale.csv', index_col=0)
df = df.reindex(np.random.permutation(df.index))
df = df.sort_index()
df_features = df[['longitude', 'latitude',
                  'housing_median_age', 'total_rooms', 'total_bedrooms', 'population',
                  'households', 'median_income', 'rooms_per_person']]
df_targets = df['median_house_value']
train_features = df_features.head(12000).astype('float32')
train_targets = df_targets.head(12000).astype('float32')
validation_features = df_features.tail(5000).astype('float32')
validation_targets = df_targets.tail(5000).astype('float32')

# def get_num_epochs(matrix, epochs):
#     i = matrix.copy()
#     for k in range(epochs):
#         matrix = np.concatenate([matrix, i])
#     return matrix
#
# batch = 0
# def get_batch(matrix, batch_size):
#     global batch
#     outputs = matrix[batch:batch+batch_size, :]
#     batch += batch_size
#     return outputs

dnn = _dnn()
xs = np.mat(train_features)
ys = np.mat(train_targets).T
vx = np.mat(validation_features)
vy = np.mat(validation_targets).T
# xk_epoch = get_num_epochs(xk, 2000)
# xs = get_batch(xk_epoch, 8000)
# yk_epoch = get_num_epochs(yk, 2000)
# ys = get_batch(yk_epoch, 8000)
x_input = tf.placeholder(tf.float32, [None, 9])
y_input = tf.placeholder(tf.float32, [None, 1])

w1, b1, l1 = dnn.add_layer(x_input, 9, 10, activation_function=tf.nn.tanh)
w2, b2, l2 = dnn.add_layer(l1, 10, 10)
w3, b3, pred = dnn.add_layer(l2, 10, 1)

loss = dnn._loss(pred, y_input)
train = dnn.train_step(0.01, loss)

sess = tf.Session()
init = tf.global_variables_initializer()
sess.run(init)

for i in range(2000):
    sess.run(train, feed_dict={x_input: xs, y_input: ys})
    if i % 50 == 0:
        print(sess.run(loss, feed_dict={x_input: vx, y_input: vy}))

comment掉的地方是我用numpy来模仿dataset的写法,在batchsize过小的情况下依旧收敛到115左右。使用所有数据大概率收敛到60~70左右。将lr设置到0.1的情况下部分时候收敛到115。可能因为我们的数据集过小,导致每次取出的batchsize不能代表整个数据集,参数改了很多次包括去掉l2正则化(在batchsize等于整个数据集的情况下,我没添加l2正则化)。这里给出一个loss运行结果。

梯度在rmse为115左右(验证集)并没有再收敛(本以为附近有局部最优解)。但是突然梯度开始下降,降到61左右。现在看起来还是感觉有点奇怪,如果有人能解释下并提出优化方案那就感激不尽了。原文后2个方案是尝试不用的train方法,这里不再提,只需在dnn结构中更改下tf.train的算法就行。包括只使用经度和纬度也是一样的。毕竟框架已经建立好了。

 

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值