直接上码:下面是REPL代码,不是完整的脚本工程!
using JLD2;
using DelimitedFiles;
csv_file = "C:\\Users\\rustr\\Desktop\\000001.XSHE.csv";
jld_file = "C:\\Users\\rustr\\Desktop\\000001.XSHE2.h5";
# readdlm header=true,=>tuple
csv_data,~ = DelimitedFiles.readdlm(csv_file,',',header =true);
# @time :6.5 seconds
s= size(csv_data); # 59w,14col
#close(f)
csv = csv_data[:,2:end];
new_csv = drop_array_null(csv);
# data=>
data = turn_larger(new_csv,20);
group_dataset_name = "000001.XSHE/000002.XSHE";
# write
#@time write_jld_h5(jld_file,data,group_dataset_name) # 4s
# read
for i =1:10
@time data = read_jld_h5(jld_file,group_dataset_name) #0.5-1s
end
#test
function turn_larger(data,n)
_data = data
for i =1: n
_data = vcat(_data,data)
end
return _data
end
function drop_array_null(csv_data)
s = size(csv_data)
arrdata = Array{Float64,2}(undef,s...)
for i = 1:length(csv_data)
if csv_data[i] == ""
arrdata[i]= 0.0
else
arrdata[i] =csv_data[i]
end
end
return arrdata
end
function write_jld_h5(file,data,group_dataset_name)
f = JLD2.jldopen(file, "w")
try
JLD2.write(f, group_dataset_name, data)#
finally
JLD2.close(f)
end
end
function read_jld_h5(file,group_dataset_name)
f = JLD2.jldopen(file, "r")
data =[]
try
data = JLD2.read(f,group_dataset_name)
finally
JLD2.close(f)
end
return data
end
#
@time write_h5(jld_file,data,group_dataset_name)
function write_h5(file,data,group_dataset_name)
HDF5.h5write(file,group_dataset_name,data)
end
@time data = read_h5(jld_file,group_dataset_name)
function read_h5(file,group_dataset_name)
fid = HDF5.h5open(file,"r")
data =[]
try
data = HDF5.read(fid,group_dataset_name)
finally
HDF5.close(fid)
end
return data
end
结论:
1、两个库效率差不多,至少JLD2没感觉和HDF5有什么明显的区别。
由于JLD2库更加有活力,一直在更新,是Julia的主力库;HDF5库则不同,已经不更新了。
建议转JLD2吧。