这里前提要求我们weights+biases的数量不能超过600个,并且loss(对数损失函数)的loss不能超过0.35 。现在让我们来处理数据集。
这里我们是median_house_value作为targets,并且超过75%的设为1,低于75%的设为0。这里我们不再按照之前的写法,而是将所有的特征进行分箱。
import numpy as np
import pandas as pd
import tensorflow as tf
def gets_onehot(inputs, features, buckets):
_inputs = {features: inputs.values}
df_fc = tf.feature_column.numeric_column(features)
_range = np.linspace(inputs.min(), inputs.max(), buckets+1)
_range = np.delete(_range, -1)
_range = np.delete(_range, 0)
_column = tf.feature_column.bucketized_column(df_fc, list(_range))
_tensor = tf.feature_column.input_layer(_inputs, [_column])
return _tensor
df = pd.read_csv('california_housing_train.csv')
df['rooms_per_person'] = df['total_rooms'] / df['population']
df = df.reindex(np.random.permutation(df.index))
df = df.sort_index()
sess = tf.Session()
df_longitude = gets_onehot(df['longitude'], 'longitude', 50)
df_latitude = gets_onehot(df['latitude'], 'latitude', 50)
df_housing_median_age = gets_onehot(df['housing_median_age'], 'housing_median_age', 10)
df_households = gets_onehot(df['households'], 'households', 10)
df_total_rooms = gets_onehot(df['total_rooms'], 'total_rooms', 10)
df_total_bedrooms = gets_onehot(df['total_bedrooms'], 'total_bedrooms', 10)
df_population = gets_onehot(df['population'], 'population', 10)
df_median_income = gets_onehot(df['median_income'], 'median_income', 10)
df_rooms_per_person = gets_onehot(df['rooms_per_person'], 'rooms_per_person', 10)
np_targets = np.array(((df['median_house_value'] > 265000).astype('float32')))[:, np.newaxis]
np_concat = np.concatenate(sess.run([df_longitude, df_latitude, df_housing_median_age,
df