import pandas as pd
import numpy as np
import re
pd. set_option( 'display.max_columns' , None )
pd. set_option( 'display.max_rows' , None )
import os
name = pd. read_excel( r'D:\\个.xls' , sheet_name = 0 )
for i in [ 2012 ] :
print ( i)
path = 'D:\个人文件\研究生论文\长江\数据再处理扩充版\知网数据处理\合并结果\合并结果{}' . format ( i)
excel_name = list ( os. listdir( path) )
with pd. ExcelWriter( r"D:\个人文件\研究生论文\长江\数据再处理扩充版\知网数据处理\筛选结果\\{}年中国城市统计年鉴长三角.xls" . format ( i) ) as writer:
for i_name_index, i_name in enumerate ( excel_name) :
excelFile = os. path. join( path, i_name)
data = pd. read_csv( excelFile, encoding = 'GBK' )
sheet1 = pd. DataFrame( [ ] , columns= data. columns)
sheet1 = sheet1. append( data. iloc[ 0 : 15 , : ] )
for j_inedx , j in enumerate ( name. iloc[ : , 0 ] ) :
a = 0
lieshu = 1
for i_index, i in enumerate ( data. iloc[ : , lieshu] ) :
i = str ( i)
i_copy = '' . join( i. split( ) )
if i == j + '市' :
sheet1 = sheet1. append( data. iloc[ i_index, : ] )
a= 1
break
elif j + '市' == i_copy:
sheet1 = sheet1. append( data. iloc[ i_index, : ] )
a= 1
break
else :
pass
if a == 0 :
sheet1 = sheet1. append( [ { sheet1. columns[ lieshu] : j} ] )
sheet1. to_excel( writer, sheet_name= '{}' . format ( i_name_index) )
writer. save( )
writer. close( )
合并用
import pandas as pd
import os
import re
years = [ 2012 ]
for year in years:
path = 'D:\个人文件\研究生论文\长江\数据再处理扩充版\知网数据处理\数据excel\\{}年地级市统计年鉴' . format ( year)
csv_path = 'D:\个人文件\研究生论文\长江\数据再处理扩充版\知网数据处理\\csv统计年鉴\\{}年统计年鉴' . format ( year)
excel_name = list ( os. listdir( path) )
name_list = [ ]
for i_excel_name in excel_name:
name_list_i = re. findall( '(.*)\.' , i_excel_name)
name_list. append( '' . join( name_list_i) )
print ( name_list)
for i_index, i_name_list in enumerate ( name_list) :
i_name_list_xls = i_name_list + '.xls'
append_list = list ( )
print ( append_list)
print ( type ( append_list) )
print ( i_name_list, i_name_list_xls)
while True :
i_name_list_xls = i_name_list_xls + '.csv'
print ( i_name_list_xls)
if os. path. exists( csv_path + '\\' + i_name_list_xls) :
append_list. append( '' . join( i_name_list_xls) )
print ( append_list)
else :
break
moban = pd. read_csv( csv_path + '\\' + append_list[ 0 ] , encoding= 'GBK' )
append_data = pd. DataFrame( [ ] , columns= moban. columns)
for i_append_list in append_list:
csv_path_csv = csv_path + '\\' + i_append_list
data = pd. read_csv( csv_path_csv, encoding = 'GBK' )
append_data = append_data. append( data)
append_data. to_csv( 'D:\个人文件\研究生论文\长江\数据再处理扩充版\知网数据处理\合并结果\合并结果{0}\\{0}年{1}.csv' . format ( year , i_index) , encoding = 'GBK' )
> https: // blog. csdn. net/ qq_42830971/ article/ details/ 114371667