# encoding=utf-8
import numpy as np
import pandas as pd
import sys
### 数据读取
# 读取文本格式数据
df = pd.read_csv('d:../data/ex1.csv')
print 'df:\n', df
pt = pd.read_table('d:../data/ex1.csv', sep=',')
print 'pt:\n', pt
pc = pd.read_csv('d:../data/ex2.csv', header=None)
print 'pc:\n', pc
pc1 = pd.read_csv('d:../data/ex2.csv', names=['a', 'b', 'c', 'd', 'message'])
print 'pc1:=\n', pc1
names = ['a', 'b', 'c', 'd', 'message']
print 'names:=', names
# 指定行索引和 列索引
prc2 = pd.read_csv('d:../data/ex2.csv', names=names, index_col='message')
print 'prc2:=\n', prc2
parsed = pd.read_csv('d:../data/csv_mindex.csv', index_col=['key1', 'key2'])
print 'parsed:\n', parsed
listResult = list(open('d:../data/ex3.txt'))
print 'listResult:\n', listResult
result = pd.read_table('../data/ex3.txt', sep='\s+')
print 'result:\n', result
skiprowsResult = pd.read_csv('d:../data/ex4.csv', skiprows=[0, 2, 3])
print 'skiprowsResult:\n', skiprowsResult
result = pd.read_csv('d:../data/ex4.csv', skiprows=[0, 2, 3])
print 'result:\n', result
result = pd.read_csv('d:../data/ex5.csv')
print 'result:\n', result
print 'result.columns:=\n', result.columns
print 'pd.isnull(result):=\n', pd.isnull(result)
print 'result.dtypes:=\n', result.dtypes
result = pd.read_csv('d:../data/ex5.csv', na_values=['NULL'])
# result = pd.read_csv('d:../data/ex5.csv')
print 'result_na_values:=\n', result
print 'pd.isnull(result)\n', pd.isnull(result)
sentinels = {'message': ['foo', 'NA'], 'something': ['two']}
print 'sentinels:=\n', sentinels
print 'sentinels.keys()=\n', sentinels.keys()
print 'sentinels.items():=\n', sentinels.items()
# 逐行读取文本文件
# 指定读取的行数
nrows = pd.read_csv('d:../data/ex6.csv', nrows=5)
print 'nrows:=\n', nrows
# 逐块读取
chunker = pd.read_csv('d:../data/ex6.csv', chunksize=1000)
print 'chunker:=\n', chunker
chunker = pd.read_csv('d:../data/ex6.csv', chunksize=1000)
tot = pd.Series([])
for piece in chunker:
tot = tot.add(piece['key'].value_counts(), fill_value=0)
print 'tot:=\n', tot
print 'tot[:10]:=\n', tot[:10]
# tot=tot.order(ascending=False)
# 文件写出
data = pd.read_csv('d:../data/ex5.csv')
print 'data:=\n', data
data.to_csv('out.csv')
# 指定分隔符输出
data.to_csv('out.csv', sep='|')
# 空值以NULL代替
data.to_csv('out1.csv', na_rep='NULL')
# 不放入 行索引和咧索引
data.to_csv('out2.csv', index=False, header=False)
data.to_csv('out3.csv', index=False, columns=['a', 'b', 'c'])
dates = pd.date_range('1/1/2000', periods=7)
ts = pd.Series(np.arange(7), index=dates)
print 'test1'
print 'ts:=\n', ts
ts.to_csv('tseries.csv')
from_csv = pd.Series.from_csv('tseries.csv', parse_dates=True)
print 'from_csv:=\n', from_csv
print 'test'
numpy 学习笔记7
最新推荐文章于 2019-08-16 21:32:44 发布