#CSV转TXT
# -*-coding:utf-8-*-
import csv
readFile = open('8_new_train.csv',encoding = 'utf-8')
f = csv.reader(readFile)
count = 0
count1 = 0
count2 = 0
f1 = open('8_cnews.train.txt','a',encoding = 'utf-8')
f2 = open('8_cnews.val.txt','a',encoding = 'utf-8')
for row in f:
count += 1
if count == 1:
continue
if str(row[2]) == '0':
count1 += 1
if count1 < 2950:
f1.write(str(row[2]) + '\t' + row[1] + '\n')
if count1 >= 2950 and count1 < 3300:
f2.write(str(row[2]) + '\t' + row[1] + '\n')
if str(row[2]) == '1':
count2 += 1
if count2 < 2950:
f1.write(str(row[2]) + '\t' + row[1] + '\n')
if count2 >= 2950 and count2 <= 3300:
f2.write(str(row[2]) + '\t' + row[1] + '\n')
#TXT转CSV
import csv
# # path = './random_test'
# path = './random_train'
# list = os.listdir(path)
with open('(cnews)test.csv', 'w+', newline='',encoding='utf-8') as csvfile:
spamwriter = csv.writer(csvfile, dialect='excel')
# spamwriter.writerow(['id','text','label'])
# 读要转换的txt文件,文件每行各词间以字符
with open('cnews.test(1).txt', 'r', encoding='utf-8') as f1:
lines = f1.readlines()
for line in lines:
line_list = line.strip('\n').split('\t')
author = "哈哈"
content = str(line_list[1])
line_list = [id,author,content]
spamwriter.writerow(line_list)