import numpy as np
import pandas as pd
import tensorflow as tf
df = pd.read_csv('california_housing_train.csv')
def z_score(features):
df = (features - features.mean()) / features.std()
return df
def log_score(features):
df = np.log(features + 1)
return df
def gets_onehot(inputs, features, buckets):
_inputs = {features: inputs.values}
df_fc = tf.feature_column.numeric_column(features)
_range = np.linspace(inputs.min(), inputs.max(), buckets+2)
_range = np.delete(_range, -1)
_range = np.delete(_range, 0)
_column = tf.feature_column.bucketized_column(df_fc, list(_range))
_tensor = tf.feature_column.input_layer(_inputs, [_column])
return _tensor
df['per_rooms'] = df['total_rooms'] / df['population']
df['per_bedrooms'] = df['total_bedrooms'] / df['population']
df['households'] = log_score(df['households'])
df['housing_median_age'] = log_score(df['housing_median_age'])
df['median_house_value'] = log_score(df['median_house_value'])
df['per_rooms'] = log_score(df['per_rooms'])
df['per_bedrooms'] = log_score(df['per_bedrooms'])
df['median_income'] = log_score(df['median_income'])
sess = tf.Session()
df1 = sess.run(gets_onehot(df['latitude'], 'latitude', 6)).copy()
df2 = sess.run(gets_onehot(df['longitude'], 'longitude', 6)).copy()
df1 = pd.DataFrame(df1, columns=['latitude1', 'latitude2', 'latitude3', 'latitude4', 'latitude5'
, 'latitude6', 'latitude7'])
df2 = pd.DataFrame(df2, columns=['longitude1', 'longitude2
【学习笔记】特征组合编程练习
最新推荐文章于 2023-06-28 16:11:39 发布
这篇博客介绍了在Python中进行特征组合的实践,使用Z分数处理数据,并针对经度和纬度采用分箱方法。作者搭建了一个全连接神经网络,采用Adam优化器并讨论了L2正则化。在训练过程中遇到OutOfRange错误,提示读者注意repeat中的epochs数值。最后,网络在训练数据上的损失接近0.63,并建议读者自行尝试不同的特征组合以降低损失。文章还提及特征相关性的分析以及特征处理方法的选择,强调了对数值范围的限制对加速学习的重要性。
摘要由CSDN通过智能技术生成