如图所示,按照文件夹的顺序逐个将txt文件的内容对应写到csv中
两种方法:
例如:原数据有935个txt文件
第一种:不跳过 空txt 最终935条数据存储在csv中
第二种:跳过 空txt 最终853条数据存储在csv中
import pandas as pd
import os
import csv
import natsort
data_path = 'F:/result_txt/1_3train' # 你的txt文件存储路径,即txt所在文件夹
data_names = natsort.natsorted(os.listdir(data_path),alg = natsort.ns.PATH) # 按照文件夹内的顺序排序
# data_names = os.listdir(data_path) # 按照文件夹默认的排序
# 第一种:
# 按顺序读取txt文件夹内的txt文件,将空的 txt 文件写入'NULL',存储在‘new_csv.csv’
with open('new_csv.csv', 'a', newline='') as csvfile:
writer = csv.writer(csvfile)
for data_name in data_names:
list1 = []
try:
tmp = pd.read_csv(os.path.join(data_path, data_name), encoding='gbk')
# print(tmp.columns[0])
list1.append(tmp.columns[0])
except Exception as e:
# print(e, type(e))
if (isinstance(e, pd.errors.EmptyDataError)):
list1.append('NULL')
writer.writerow(list1)
csvfile.close()
# 第二种:
# 按顺序读取txt文件夹内的txt文件,将空的 txt 文件跳过, 存储在‘new_csv.csv’
for data_name in data_names:
with open('new_csv.csv', 'a', newline='') as csvfile:
writer = csv.writer(csvfile)
try:
tmp = pd.read_csv(os.path.join(data_path, data_name), encoding='gbk')
writer.writerow(tmp)
except Exception as e:
#print(e, type(e))
if (isinstance(e, pd.errors.EmptyDataError)):
print("此处有空行文件,已跳过,读取下一个文件")
csvfile.close()