分享朋友的机器学习应用案例:使用机器学习实现财富自由www.abuquant.com
并发读取数据能够大大加速数据的读取速度。
import tensorflow as tf
import numpy as np
from IPython.display import display, HTML
import matplotlib.pyplot as plt
plt.rcParams["figure.figsize"] = (20,10)
filenames = tf.train.match_filenames_once('data/*.csv')
print filenames
keep_prob = tf.Variable(1.0 , name='keep_prob')
# 产生文件名字的队列 epchs 是循环三轮,当前/data 下面有3个文件,那么总共就会有9个文件。
# 这个num_epochs可以和训练时候的epoch数量一致
filename_queue = tf.train.string_input_producer(filenames, shuffle=False, num_epochs=3)
<tf.Variable 'matching_filenames:0' shape=<unknown> dtype=string_ref>
# 定义reader
reader = tf.TextLineReader()
# 从前面的queue中读取, key就是文件名,value就是读取的数据
key, value = reader.read(filename_queue)
# 将读取的csv内容进行解码
example, label = tf.decode_csv(value, record_defaults=[['null'], ['null']])
# 运行上面构建的图
with tf.Session() as session:
session.run(tf.local_variables_initializer()) # 注意这里是local initliazer.
session.run(tf.global_variables_initializer())
#print session.run([filenames])
print(session.run([keep_prob]))
print(session.run([filenames]))
coord = tf.train.Coordinator() # 创建线程管理器 类似线程池
threads = tf.train.start_queue_runners(coord=coord) # 队列运行的线程池
for i in xrange(20):
e, l = session.run([example, label])
print e, l
coord.request_stop()
coord.join(threads)
[1.0]
[array(['data/A.csv', 'data/B.csv', 'data/C.csv'], dtype=object)]
A1 a1
A2 a2
A3 a3
B1 b1
B2 b2
B3 b3
C1 c1
C2 c2
c3 c3
A1 a1
A2 a2
A3 a3
B1 b1
B2 b2
B3 b3
C1 c1
C2 c2
c3 c3
A1 a1
A2 a2