一、各种类型的数据形式及其对应使用的库
csv、pkl(cPickle、Pickle)、json、xsl、html、xml —— pandas
npy、npz(多个npy的压缩)——numpy
hdf —— 跨平台高校读写
二、TFRecord数据处理
1. 转化为tfrecords格式
# csv->tfrecords
def transform_to_tfrecord():
data = pd.read_csv('data/train.csv')
tfrecord_file = 'train.tfrecords'
def int_feature(value):
return tf.train_Feature(int64_list = tf.train.Int64List(value = [value]))
def float_feature(value):
return tf.train_Feature(float_list = tf.train.FloatList(value = [value]))
writer = tf.python_io.TFRecordWriter(tfrecord_file)
for i in range(len(data)):
feature = tf.trian.Features(feature = {
'Age':float_feature(data['Age'][i])
...
})
example = tf.train.Example(feature = features)
writer.write(example.Seria