# coding: utf-8
'''
用keras2.x写的google Wide&&Deep model
'''
import pandas as pd
from keras import Input, Model
from keras.layers import Dense, Concatenate
from sklearn.preprocessing import MinMaxScaler
# 所有的数据列
COLUMNS = [
"age", "workclass", "fnlwgt", "education", "education_num", "marital_status",
"occupation", "relationship", "race", "gender", "capital_gain", "capital_loss",
"hours_per_week", "native_country", "income_bracket"
]
# 标签列
LABEL_COLUMN = "label"
# 类别型特征变量
CATEGORICAL_COLUMNS = [
"workclass", "education", "marital_status", "occupation", "relationship",
"race", "gender", "native_country"
]
# 连续值特征变量
CONTINUOUS_COLUMNS = [
"age", "education_num", "capital_gain", "capital_loss", "hours_per_week"
]
# 加载文件
def load(filename):
with open(filename, 'r') as f:
skiprows = 1 if 'test' in filename else 0
df = pd.read_csv(
f, names=COLUMNS, skipinitialspace=True, skiprows=skiprows, engine='python'
)
# 缺省值处理
df = df.dropna(how='any', axis=0)
return df
# 预处理
def preprocess(df):
df[LABEL_COLUMN] = df['income_bracket'].apply(lambda x: ">50K" in x).astype(int)
df.pop("income_bracket")
y = df[LABEL_COLUMN].values
df.pop(LABEL_COLUMN)
df = pd.get_dummies(df, columns=[x for x in CATEGORICAL_COLUMNS])
# TODO: 对特征进行选择,使得网络更高效
# TODO: 特征工程,比如加入交叉与组合特征
df = pd.DataFrame(MinMaxScaler().fit_transform(df), columns=df.columns)
X = df.values
return X, y
def main():
df_train = load('adult.data')
df_test = load('adult.test')
df = pd.concat([df_train, df_test])
train_len = len(df_train)
X, y = preprocess(df)
X_train = X[:train_len]
y_train = y[:train_len]
X_test = X[train_len:]
y_test = y[train_len:]
input_layer = Input(shape=(X_train.shape[1]))
wide_layer = Dense(1, activation='sigmoid')(input_layer)
deep_layer = Dense(64, activation='relu')(input_layer)
deep_layer = Dense(32, activation='relu')(deep_layer)
deep_layer = Dense(16, activation='relu')(deep_layer)
wide_deep_layer = Concatenate(axis=1)([wide_layer, deep_layer])
output_layer = Dense(1, activation='sigmoid')(wide_deep_layer)
model = Model(inputs=input_layer, outputs=output_layer)
model.compile(
optimizer='rmsprop',
loss='binary_crossentropy',
metrics=['accuracy']
)
# 模型训练
model.fit(X_train, y_train, epochs=10, batch_size=32, validation_data=(X_test, y_test))
# loss与准确率评估
loss, accuracy = model.evaluate(X_test, y_test)
print('\n', 'test loss:', loss)
print('\n', 'test accuracy:', accuracy)
if __name__ == '__main__':
main()
基于keras2.x写的google Wide&&Deep model
最新推荐文章于 2024-04-30 19:24:04 发布