"""
Created on Tue Aug 7 14:36:45 2018
@author: 33
"""
import sys
import pandas as pd
import os
import re
import xml.etree.ElementTree as ET
from lxml import etree
import xlwt
import csv
import codecs
def list_all_files(rootdir):
_files = []
list = os.listdir(rootdir)
for i in range(0,len(list)):
path = os.path.join(rootdir,list[i])
if os.path.isdir(path):
_files.extend(list_all_files(path))
if os.path.isfile(path):
_files.append(path)
return _files
file_dir = r'E:\summer_intern\row_to_column\Morocco--201605-201712-m-10_清洗后'
files = list_all_files(file_dir)
print('*********************')
files1 = []
for f in files:
if (not re.search('new.csv', f)) and re.search('.csv', f):
files1.append(f)
files = files1
print(files)
print(len(files))
history = codecs.open('Problems.txt', 'a+', 'utf-8')
def run():
print('=============start running===========')
file_num = 0
for file in files:
history.write(file + '\n')
file_num += 1
file1 = re.findall('(.*).csv', file)[0]
file1 = file1 + '_new.csv'
reader = csv.reader(open(file, encoding = 'UTF-8'))
with open(file1, 'w', newline = '', encoding= 'UTF-8') as g:
writer = csv.writer(g)
print('reading: ', file)
i = 0
for row in reader:
i += 1
if i == 1:
col_num = len(row)
print('column: ', col_num)
if len(row) > col_num and row[-1] == '':
print('Problem: ', row, 'length: ', len(row))
row.pop(-1)
history.write(row[0] + '第' + str(i) + '行\n')
writer.writerow(row)
print('已经读到 ',file_num)
run()
def revisefile():
for file in files:
file1 = re.findall('(.*).csv', file)[0]
file1 = file1 + '_new.csv'
try:
os.remove(file)
except:
pass
try:
os.rename(file1, file)
except:
pass
revisefile()