.csv 文件是我们日常生活中经常遇到的文件类型。
一般来说,处理csv文件,直接利用Excel就可以了。但是,如果csv文件特别大,直接用Excel把文件打开的话很占内存。
这时候,我们的python就可以排上用场了。
首先,我们先看看利用csv.reader()处理csv文件
读入数据:
#!user/bin/env python
#-*- coding:utf-8 -*-
import csv
datafile = 'example.csv'
data = []
with open(datafile,'rb') as f:
f.readline() #f 是有指针的 与c语言类似
f.readline()#忽略前两行乱码
cv = csv.reader(f)
for line in cv: #line 是list对象
data.append(line)#data是嵌套的list
将嵌套列表data写入文件:
with open('output.csv','w') as f:
writer = csv.writer(f)
#for ele in data:
# writer.writerow(ele)
writer.writerows(data)
其次,还可以用csv.DictReader()进行处理
读入数据:
data = []
with open(input_file, "r") as f:
reader = csv.DictReader(f)
header = reader.fieldnames #获取表头
for i in range(3):
l = reader.next() #忽略3行文件
for line in reader: #line 为 dict 类型 数据由line[key]的形式获得
data.append(line)
写入数据:
with open(outputfile, "w") as g:
writer = csv.DictWriter(g, delimiter=",", fieldnames= header)
writer.writeheader() #写入表头
for row in data:#写入数据
writer.writerow(row)#row为字典类型
下面看看用pandas库如何处理csv文件
import pandas as pd
predictions = {}
df = pd.read_csv(file_path)#df是DateFrame类型
for passenger_index, passenger in df.iterrows():#passenger是字典类型
passenger_id = passenger['PassengerId']
if passenger['Sex'] == 'female' and passenger['Age']<50:
predictions[passenger_id] = 1
elif passenger['Pclass']==1 and passenger['Age']<18:
predictions[passenger_id] = 1
else:
predictions[passenger_id] = 0
#写入文件
dataframe = pd.read_csv(path_to_csv)
dataframe['nameFull'] = dataframe['nameFirst']+' '+dataframe['nameLast']
dataframe.to_csv(path_to_new_csv)#写入csv文件