一.基本I/O
1.将对象写入磁盘
from pylab import plt, mpl
plt.style.use('seaborn')
mpl.rcParams['font.family'] = 'serif'
%matplotlib inline
使用随机数据,保存到一个列表对象中
import pickle
import numpy as np
from random import gauss #生成正态分布随机数用的
a = [gauss(1.5, 2) for i in range(1000000)]
path = '....'
pkl_file = open(path + 'data.pkl', 'wb')
序列化和反序列化-写入对象的dump(),加载到内存的load()
%time pickle.dump(a, pkl_file) #序列化对象a
pkl_file.close()
pkl_file = open(path + 'data.pkl', 'rb') #二进制读模式打开文件
%time b = pickle.load(pkl_file) #读取对象并反序列化
np.allclose(np.array(a), np.array(b)) #转化为ndarray对象,验证是否包含相同的数值
pkl_file = open(path + 'data.pkl', 'wb')
%time pickle.dump(np.array(a), pkl_file) #序列化
%time pickle.dump(np.array(a) ** 2, pkl_file)
pkl_file.close()
将两个ndarray对象读回内存
pkl_file = open(path + 'data.pkl', 'rb')
x = pickle.load(pkl_file)
x[:4]
y = pickle.load(pkl_file)
y[:4]
pkl_file.close()
读取字典对象
pkl_file = open(path + 'data.pkl', 'wb')
pickle.dump({
'x': x, 'y': y}, pkl_file)
pkl_file.close()
pkl_file = open(path + 'data.pkl', 'rb')
data = pickle.load(pkl_file)
pkl_file.close()
for key in data.keys():
print(key, data[key][:4])
2.读取和写入文本文件
import pandas as pd
rows = 5000
a = np.random.standard_normal((rows, 5)).round(4)
t = pd.date_range(start='2019/1/1', periods=rows, freq='H')
csv_file = open(path + 'data.csv', 'w')
header = 'date,no1,no2,no3,no4,no5\n'
for t_, (no1, no2, no3, no4, no5) in zip(t, a): #按行合并
s = '{},{},{},{},{},{}\n'.format(t_, no1, no2, no3, no4, no5)
csv_file.write(s) #逐行写入
csv_file.close()
用readlines()方法逐行读入
csv_file = open(path + 'data.csv', 'r')
for i in range(