# -*- coding: utf-8 -*-
"""
Created on Tue Aug 7 14:36:45 2018
@author: 33
"""
import sys
#reload(sys)
#sys.setdefaultencoding('utf-8')
import pandas as pd
import os
import re
import xml.etree.ElementTree as ET
from lxml import etree
import xlwt
import csv
import codecs
def list_all_files(rootdir):
_files = []
list = os.listdir(rootdir) #列出文件夹下所有的目录与文件
for i in range(0,len(list)):
path = os.path.join(rootdir,list[i])
if os.path.isdir(path):
_files.extend(list_all_files(path))
if os.path.isfile(path):
_files.append(path)
return _files
file_dir = r'E:\summer_intern\row_to_column\Morocco--201605-201712-m-10_清洗后'
files = list_all_files(file_dir)
# print(files)
print('*********************')
files1 = []
for f in files:
if (not re.search('new.csv', f)) and re.search('.csv', f):
#temp = f.replace("'",'') # 把单引号替换为没有。
#os.rename(f, temp)
files1.append(f)
files = files1
print(files)
print(len(files))
history = codecs.open('Problems.txt', 'a+', 'utf-8')
################ 明天来试试哦
#d=open('1.csv').readlines()
#d[1]=''
#with open('new.csv','w') as f:
# f.writelines(d)
#with open('fin.csv', 'r') as fin, open('fout.csv', 'w', newline='') as fout:
#
# # define reader and writer objects
# reader = csv.reader(fin, skipinitialspace=True)
# writer = csv.writer(fout, delimiter=',')
#
# # write headers
# writer.writerow(next(reader))
#
# # iterate and write rows based on condition
# for i in reader:
# if int(i[-1]) > 2000:
# writer.writerow(i)
def run():
print('=============start running===========')
file_num = 0
for file in files:
history.write(file + '\n')
file_num += 1
# 指定打开的和写入的文件。
file1 = re.findall('(.*).csv', file)[0]
file1 = file1 + '_new.csv'
reader = csv.reader(open(file, encoding = 'UTF-8'))
with open(file1, 'w', newline = '', encoding= 'UTF-8') as g: # 这个 writer 是为后面用的。是一个工具。
writer = csv.writer(g)
print('reading: ', file)
i = 0
# count = 0
for row in reader:
i += 1
# data1 = row
if i == 1:
col_num = len(row)
print('column: ', col_num)
#if len(row) > col_num and row[-1] != '':
if len(row) > col_num and row[-1] == '': # and row[-1] != '0':
print('Problem: ', row, 'length: ', len(row))
#row[2] = 0
row.pop(-1)
history.write(row[0] + '第' + str(i) + '行\n')
# count += 1
writer.writerow(row)
#history.write(row[1] + ' 第' + str(i) + '行\n')
#f.write(item[1] + '\n')
print('已经读到 ',file_num)
run()
def revisefile():
for file in files:
file1 = re.findall('(.*).csv', file)[0]
file1 = file1 + '_new.csv'
try:
os.remove(file)
except:
pass
try:
os.rename(file1, file)
except:
pass
revisefile()
清洗数据
最新推荐文章于 2024-01-02 01:20:57 发布