例如我们有这样的文本(举例自己造,原本是爬虫时中的json是只有一行,爬出来里面夹杂了一些只有几个字符的行),想要删除那些短行!
import os
path = r'E:\m'
def Cleaning_txt(path):
rootdir = path
for root, dirnames, filenames in os.walk(rootdir):
j = 1
for name in sorted(filenames):
path_ = os.path.join(root, name) ###获得文件地址
with open(path_, 'r', encoding='utf-8-sig') as file:
datan = []###造一个空列表来放长度大于10的字符串
data = file.read()
data1 = data.splitlines()
for line in data1:
if len(line)>10:###设定大于10的取出保存
datan.append(line)
sep = '\n'
strdata=sep.join(datan)######字符串用换行符拼接
print(strdata)
f = open(r'E:n\a.txt', 'a', encoding='utf-8-sig')
f.write(strdata)
f.close()
j += 1
Cleaning_txt(path)
sep = ''里为空
就变成了一整行