tensorflow eager 模式下打印dataset中以及经过feature column后的数据
由于调试的需求,需要对比dataset中的原始数据和经过feature_column后的数据,那就需要把数据打出来,代码如下:
import tensorflow as tf
import tensorflow.contrib.eager as tfe
tfe.enable_eager_execution()
def parse_line(line):
# CSV_COLUMN_DEFAULTS等需要自己定义
columns = tf.decode_csv(line, record_defaults=CSV_COLUMN_DEFAULTS, field_delim='\t')
features = dict(zip(CSV_COLUMNS, columns))
labels = features.pop(LABEL_COLUMN)
features = process(features) # 对features中各个特征的额外处理,需要自己定义
return features, label
if __name__ == "__main__":
dataset = tf.data.TextLineDataset("data_name")
#print(dataset.output_shapes)
for line in tfe.Iterator(dataset):
features, label = ele
print("before feature column: {}".format.(features['feature_A_name']))
feature_A = categorical_column_with_vocabulary_list('feature_A_name', [自定义的列表], dtype=tf.string, default_value=-1)
feature_A_weight = weighted_categorical_column(feature_A, weight_feature_key='feature_A_weight_name')
feature_A_column = tf.feature_column.indicator_column(feature_A_weight)
columns = [feature_A_column]
inputs = tf.feature_column.input_layer(features, columns)
print("after feature column: {}".format(inputs))
###或者:
dataset = dataset.map(parse_line)
dataset = dataset.batch(16) #按需设置batch的大小
for ele in tfe.Iterator(dataset):
features, label = ele
#后续和上面的一样