数据文件
链接:https://pan.baidu.com/s/1igjhKnIBTx1Lqujyf1iB1Q
提取码:yuwg
TensorFlow 读取CSV文件 1.0
import matplotlib as mpl
import matplotlib.pyplot as plt
%matplotlib inline
import numpy as np
import sklearn
import pandas as pd
import os
import sys
import time
from tensorflow import keras
import tensorflow.compat.v1 as tf
tf.disable_v2_behavior()
print(tf.__version__)
print(sys.version_info)
for module in mpl, np, pd, sklearn, tf, keras:
print(module.__name__, module.__version__)
def csvread(file_list):
"""
读取CSV文件
:param filelist: 文件路径 + 名字的列表
:return:读取的内容
"""
# 1、构造文件的队列
file_queue = tf.train.string_input_producer(file_list)
# 2.构造csv阅读器读取队列数据(按行)
reader = tf.TextLineReader()
key, value = reader.read(file_queue)
print(value)
# 3.对每行内容解码
# record_defaults:指定每个样本的类型, 指定默认值
records = [["None"],["None"]]
example, label = tf.decode_csv(value, record_defaults=records)
print(example, label)
# 4.读取多个数据
example_batch, label_batch= tf.train.batch([example, label], batch_size=20, num_threads=1, capacity=20)
print(example_batch, label_batch)
return example_batch, label_batch
if __name__ == "__main__":
# 找到文件,放入列表 路径+名字 ->列表当中
file_names = os.listdir("./csvdata/")
file_list = [os.path.join("./csvdata/", file) for file in file_names]
print(file_list)
example_batch, label_batch = csvread(file_list)
# 开启会话运行结果
with tf.Session() as sess:
# 定义一个线程协调
coord = tf.train.Coordinator()
# 开启读文件的线程
threads = tf.train.start_queue_runners(sess, coord)
print(sess.run([example_batch, label_batch]))
# 回收子线程
coord.request_stop()
coord.join(threads)
TensorFlow 读取CSV文件 2.0
import matplotlib as mpl
import matplotlib.pyplot as plt
%matplotlib inline
import numpy as np
import sklearn
import pandas as pd
import os
import sys
import time
import tensorflow as tf
from tensorflow import keras
print(tf.__version__)
print(sys.version_info)
for module in mpl, np, pd, sklearn, tf, keras:
print(module.__name__, module.__version__)
def parse_csv_line(line, n_fields=2):
"""
对每行内容解码
:param line:每行数据
:param n_fields: 文件中的列数
return: x 特征值列表, y 目标值
"""
# 1.每列数据类型指定
defs = [tf.constant("None")] * n_fields
# 2.对每行数据解码
parsed_fields = tf.io.decode_csv(line, defs)
x = tf.stack(parsed_fields[0]) # x = tf.stack(parsed_fields[0:-1])
y = tf.stack(parsed_fields[1]) # y = tf.stack(parsed_fields[-1:]) 最后一列为结果
return x, y
def csv_reader_dataset(filenames, n_readers=5, batch_size=32,
n_parse_threads=5, shuffle_buffer_size=10000):
"""
读取csv文件列表
:param filenames: csv文件列表
:param n_readers: 并行读取文件的个数
:param batch_size: batch个数
:param n_parse_threads: 解码数据的线程个数
:param shuffle_buffer_size: 缓存buffer个数
return: dataset数据
"""
# 读取文件列表
dataset = tf.data.Dataset.list_files(filenames)
# 数据打乱
dataset = dataset.repeat()
# 读取文件数据
dataset = dataset.interleave(
lambda filename: tf.data.TextLineDataset(filename), # .skip(1), # 忽略第一行
cycle_length=n_readers # 并行读取的个数
)
# 缓存数据
dataset.shuffle(shuffle_buffer_size)
# 对每行数据解码
dataset = dataset.map(parse_csv_line, num_parallel_calls=n_parse_threads)
# batch文件
dataset = dataset.batch(batch_size)
return dataset
if __name__ == "__main__":
# 1.找到文件列表
# 找到文件,放入列表 路径+名字 ->列表当中
file_names = os.listdir("./csvdata/")
file_list = [os.path.join("./csvdata/", file) for file in file_names]
print(file_list)
# 2.读取csv文件
train_set = csv_reader_dataset(file_list, batch_size=3)
# 3.显示读取结果
for x_batch, y_batch in train_set.take(1):
print("x:")
print(x_batch)
print("y:")
print(y_batch)
2、读取csv文件
链接:https://pan.baidu.com/s/1t4IoQc0D36dAJ2b8-o2vug
提取码:wbcy
import tensorflow as tf
import numpy as np
def read_csv_line(line, n_fields=2):
# 设置每列数据类型
defaults = [tf.constant(0),
tf.constant(0.0),
tf.constant(0.0),
tf.constant(0.0),
tf.constant(0.0),
tf.constant(0)]
# 解析每一行数据
parsed_fields = tf.io.decode_csv(line, defaults)
x = tf.stack(parsed_fields[1:-1])
y = tf.stack(parsed_fields[-1])
return x, y
def csv_reader_dataset(filenames, n_readers=5, batch_size=32,
n_parse_threads=5, shuffle_buffer_size=10000):
"""
读取csv文件列表
:param filenames: csv文件列表
:param n_readers: 并行读取文件的个数
:param batch_size: batch个数
:param n_parse_threads: 解码数据的线程个数
:param shuffle_buffer_size: 缓存buffer个数
return: dataset数据
"""
# 读取文件列表
dataset = tf.data.Dataset.list_files(filenames)
# 数据无限循环
dataset = dataset.repeat()
# 读取文件数据
dataset = dataset.interleave(
lambda filename: tf.data.TextLineDataset(filename).skip(1), # 忽略第一行
cycle_length=n_readers # 并行读取的个数
)
# 对每行数据解码
dataset = dataset.map(read_csv_line, num_parallel_calls=n_parse_threads)
# 数据打乱
dataset = dataset.shuffle(shuffle_buffer_size)
# batch文件
dataset = dataset.batch(batch_size)
return dataset
# 1.读取训练csv文件
train_set = csv_reader_dataset(["iris_training.csv"], batch_size=8)
# 2.显示读取结果
for x_batch, y_batch in train_set.take(1):
print("x:")
print(x_batch)
print("y:")
print(y_batch)
# 1.读取测试csv文件
test_ds = csv_reader_dataset(["iris_test.csv"], batch_size=8)
# 2.显示读取结果
for x_batch, y_batch in test_ds.take(1):
print("x:")
print(x_batch)
print("y:")
print(y_batch)