读取csv文件
import numpy as np
import pandas as pd
from pandas import Series,DataFrame
import sys
df=pd.read_csv('E:/python/wangyiPython/the fifth week/data/ex1.csv')
df
df1=pd.read_table('E:/python/wangyiPython/the fifth week/data/ex1.csv',sep=',') #分隔符为逗号
df1
a=pd.read_csv('E:/python/wangyiPython/the fifth week/data/ex2.csv',header=None) #首行不作为列名
a
a1=pd.read_csv('E:/python/wangyiPython/the fifth week/data/ex2.csv',names=['a','b','c','d','message']) #指定列名
a1
names=['a','b','c','d','message']
a2=pd.read_csv('E:/python/wangyiPython/the fifth week/data/ex2.csv',names=names,index_col='message')#指定行索引
a2
parsed=pd.read_csv('E:/python/wangyiPython/the fifth week/data/csv_mindex.csv',index_col=['key1','key2'])
parsed
parsed1=pd.read_csv('E:/python/wangyiPython/the fifth week/data/csv_mindex.csv')
parsed1
b=list(open('E:/python/wangyiPython/the fifth week/data/ex3.txt'))
b
result=pd.read_table('E:/python/wangyiPython/the fifth week/data/ex3.txt',sep='\s+')#以空格为分隔符
result
c=pd.read_csv('E:/python/wangyiPython/the fifth week/data/ex4.csv',skiprows=[0,2,3])#跳过第一行,第三行,第四行
c
d=pd.read_csv('E:/python/wangyiPython/the fifth week/data/ex5.csv')
d
pd.isnull(d)#缺失值为真
sentinels={'message':['foo','NA'],'something':['two']}
d1=pd.read_csv('E:/python/wangyiPython/the fifth week/data/ex5.csv',na_values=sentinels)#将符合sentinels的值指定为缺失值
d1
e=pd.read_csv('E:/python/wangyiPython/the fifth week/data/ex6.csv')
e
e1=pd.read_csv('E:/python/wangyiPython/the fifth week/data/ex6.csv',nrows=5)#读取前5行数据
e1
e2=pd.read_csv('E:/python/wangyiPython/the fifth week/data/ex6.csv',chunksize=1000)#每1000行读取一次
e2
tot=Series([])
for piece in e2:
tot=tot.add(piece['key'].value_counts(),fill_value=0)#缺失值填补为0
tot
tot.shape
f=pd.read_csv('E:/python/wangyiPython/the fifth week/data/ex5.csv')
f
to_csv
f.to_csv('E:/python/wangyiPython/the fifth week/data/out.csv') #将文件写入新的文件out
f.to_csv(sys.stdout,sep='|') #sys.stdout重定向,缓冲
f
f.to_csv(sys.stdout,na_rep='Null')
f
f.to_csv(sys.stdout,index=False,header=False)#不打印index和header
f.to_csv(sys.stdout,index=False,columns=['a','b','c'])#将文件写入并打印a,b,c列
f
#pd.date_range产生一个Datetimeindex,时间序列数据的索引
#pd.date_range(start=None,end=None,periods=None,freq='D',tz=None,normalize=False,name=None,closed=None,**kwarges)
dates=pd.date_range('1/1/2000',periods=7)
ts=Series(np.arange(7),index=dates)
ts.to_csv('E:/python/wangyiPython/the fifth week/data/tseries.csv')
ts
Series.from_csv('E:/python/wangyiPython/the fifth week/data/tseries.csv',parse_dates=True)
引用csv包
import csv
file=open('E:/python/wangyiPython/the fifth week/data/ex7.csv')
file
reader=csv.reader(file)
for line in reader:
print(line)
lines=list(csv.reader(open('E:/python/wangyiPython/the fifth week/data/ex7.csv'))) #将读取的数据存储在列表中
header,values=lines[0],lines[1:]
data_dict={h:v for h,v in zip(header,zip(*values))}
data_dict
class my_dialect(csv.Dialect):
lineterminator='/n'
delimiter=';'
quotechar='"'
quoting=csv.QUOTE_MINIMAL
with open('mydata.csv','w') as f:
writer=csv.writer(f,dialect=my_dialect)
writer.writerow(('one','two','three'))
writer.writerow(('1','2','3'))
writer.writerow(('4','5','6'))
writer.writerow(('7','8','9'))