数据加载、存储与文件格式
读写文本格式的数据
df = pd.read_csv('ch06/ex1.csv')
df = pd.read_table('ch06/ex1.csv', sep=',')
pd.read_csv('ch06/ex2.csv', header=None)
pd.read_csv('ch06/ex2.csv', names=['a', 'b', 'c', 'd', 'message'])
names = ['a', 'b', 'c', 'd', 'message']
pd.read_csv('ch06/ex2.csv', names=names, index_col='message')
parsed = pd.read_csv('ch06/csv_mindex.csv', index_col=['key1', 'key2'])
result = pd.read_table('ch06/ex3.txt', sep='\s+')
pd.read_csv('ch06/ex4.csv', skiprows=[0, 2, 3])
sentinels = {'message': ['foo', 'NA'], 'something': ['two']}
pd.read_csv('ch06/ex5.csv', na_values=sentinels)
逐块读取文本文件
pd.read_csv('ch06/ex6.csv', nrows=5)
chunker = pd.read_csv('ch06/ex6.csv', chunksize=1000)
chunker = pd.read_csv('ch06/ex6.csv', chunksize=1000)
tot = Series([])
for piece in chunker:
tot = tot.add(piece['key'].value_counts(), fill_value=0)
tot = tot.sort_values(ascending=False)
将数据写出到文本格式
data = pd.read_csv('ch06/ex5.csv')
data.to_csv('ch06/out.csv')
data.to_csv(sys.stdout, sep='|')
data.to_csv(sys.stdout, na_rep='NULL')
data.to_csv(sys.stdout, index=False, header=False)
data.to_csv(sys.stdout, index=False, columns=['a', 'b', 'c'])
Series.from_csv('ch06/tseries.csv', parse_dates=True)
手工处理分隔符格式
lines = list(csv.reader(open('ch06/ex7.csv')))
header, values = lines[0], lines[1:]
data_dict = {h: v for h, v in zip(header, zip(*values))}
data_dict
class my_dialect(csv.Dialect):
lineterminator = '\n'
delimiter = ';'
quotechar = '"'
quoting = csv.QUOTE_MINIMAL
with open('mydata.csv', 'w') as f:
writer = csv.writer(f, dialect=my_dialect)
writer.writerow(('one', 'two', 'three'))
writer.writerow(('1', '2', '3'))
writer.writerow(('4', '5', '6'))
writer.writerow(('7', '8', '9'))
二进制数据格式
frame = pd.read_csv('ch06/ex1.csv')
frame.to_pickle('ch06/frame_pickle')
pd.read_pickle('ch06/frame_pickle')