17.从多个文件中连接数据

1.基础python 

vi 9csv_read_concat_rows_from_mutiple_files.py 
#!/usr/bin/env python3 
import csv 
import glob 
import os 
import sys 
input_file=sys.argv[1]
output_file=sys.argv[2]
first_file=True 
for input_file in glob.glob(os.path.join(input_file,'sales_*')):
	print(os.path.basename(input_file))
	with open(input_file,'rb') as csv_in_file:
		with open(output_file,'ab') as csv_out_file:
			filereader=csv.reader(csv_in_file)
			filewriter=csv.writer(csv_out_file)
			if first_file:
				for row in filereader:
					filewriter.writerow(row)
				first_file=False 
			else:
				header=next(filereader,None)  #None第一个字母必须大写。表示不取头部信息,上面已经取了。
				for row in filereader:
					filewriter.writerow(row)
#输出				
[root@mysql51 python_scripts]# python 9csv_read_concat_rows_from_mutiple_files.py `pwd`  10output.csv 
sales_march_2014.csv
sales_jannary_2014.csv
sales_february_2014.csv
			
[root@mysql51 python_scripts]# more  10output.csv 
Customer ID,Customer Name,Invoice Number,Sale Amount,Purchase Date
1234,John Smith,100-0014,"$1,350.00 ",3/4/2014
8765,Tonny Song,100-0015,"$1,167.00 ",3/8/2014
2345,Mary Harrison,100-0016,"$1,789.00 ",3/17/2014
6543,Rachel Paz,100-0017,"$2,042.00 ",3/22/2014
3456,Lucy Gomez,100-0018,"$1,511.00 ",3/28/2014
4321,Susan Wallace,100-0019,"$2,280.00 ",3/30/2014
Customer ID,Customer Name,Invoice Number,Sale Amount,Purchase Date
1234,John Smith,100-0014,"$1,350.00 ",3/4/2014
8765,Tonny Song,100-0015,"$1,167.00 ",3/8/2014
2345,Mary Harrison,100-0016,"$1,789.00 ",3/17/2014
6543,Rachel Paz,100-0017,"$2,042.00 ",3/22/2014
3456,Lucy Gomez,100-0018,"$1,511.00 ",3/28/2014
4321,Susan Wallace,100-0019,"$2,280.00 ",3/30/2014
1234,John Smith,100-0002,"$1,200.00 ",1/1/2014
2345,Mary Harrison,100-0003,"$1,425.00 ",1/6/2014
3456,Lucy Gomez,100-0004,"$1,390.00 ",1/11/2014
4567,Rupert Jones,100-0005,"$1,257.00 ",1/18/2014
5678,Jenny Walters,100-0006,"$1,725.00 ",1/24/2014
6789,Samantha Donadson,100-0007,"$1,995.00 ",1/31/2014
9876,Danniel Farber,100-0008,"$1,115.00 ",2/2/2014
8765,Laney Stone,100-0009,"$1,367.00 ",2/8/2014
7654,Roger Lipney,100-0010,"$2,135.00 ",2/15/2014
6543,Thomas Haines,100-0011,"$1,346.00 ",2/17/2014
5432,Anushka Vaz,100-0012,"$1,560.00 ",2/21/2014
4321,Harriet Cooper,100-0013,"$1,852.00 ",2/25/2014

2.pandas的方法实现。

vi pandas_concat_rows_from_multiple_files.py 
#!/usr/bin/env python3 
import pandas as pd 
import glob 
import os 
import sys 
input_file=sys.argv[1]
output_file=sys.argv[2]
all_files=glob.glob(os.path.join(input_file,'sales_*'))
all_data_frames=[]  #将读取的文件放在列表中
for file in all_files:
	data_frame=pd.read_csv(file,index_col=None)
	all_data_frames.append(data_frame)
data_frame_concat=pd.concat(all_data_frames,axis=0,ignore_index=True)
data_frame_concat.to_csv(output_file,index=False)

#结果
python C:\Users\4201.HJSC\PycharmProjects\pythonProject\pandas_concat_rows_from_multiple_files.py \
C:\Users\4201.HJSC\Desktop\Python_exercise\ \
C:\Users\4201.HJSC\Desktop\Python_exercise\13output.csv

more 13output.csv
Customer ID,Customer Name,Invoice Number,Sale Amount,Purchase Date
9876,Danniel Farber,100-0008,"$1,115.00 ",2/2/2014
8765,Laney Stone,100-0009,"$1,367.00 ",2/8/2014
7654,Roger Lipney,100-0010,"$2,135.00 ",2/15/2014
6543,Thomas Haines,100-0011,"$1,346.00 ",2/17/2014
5432,Anushka Vaz,100-0012,"$1,560.00 ",2/21/2014
4321,Harriet Cooper,100-0013,"$1,852.00 ",2/25/2014
1234,John Smith,100-0002,"$1,200.00 ",1/1/2014
2345,Mary Harrison,100-0003,"$1,425.00 ",1/6/2014
3456,Lucy Gomez,100-0004,"$1,390.00 ",1/11/2014
4567,Rupert Jones,100-0005,"$1,257.00 ",1/18/2014
5678,Jenny Walters,100-0006,"$1,725.00 ",1/24/2014
6789,Samantha Donadson,100-0007,"$1,995.00 ",1/31/2014
1234,John Smith,100-0014,"$1,350.00 ",3/4/2014
8765,Tonny Song,100-0015,"$1,167.00 ",3/8/2014
2345,Mary Harrison,100-0016,"$1,789.00 ",3/17/2014
6543,Rachel Paz,100-0017,"$2,042.00 ",3/22/2014
3456,Lucy Gomez,100-0018,"$1,511.00 ",3/28/2014
4321,Susan Wallace,100-0019,"$2,280.00 ",3/30/2014

3.总结
#axis=0 数据垂直。axis=1 数据平行分布
#pd.concat 连接数据。
#pd.merge(DataFrame1,DataFrame2,on='key',how='inner') #可以使用这种方法连接。

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值