作者选择了两个输入文本的批量保存方式。
一个是str类型,一个是dataframe类型
1.str直接作为文本内容
import pandas as pd
import os
import re
import jieba
# 设置当前工作路径
os.chdir(r'C:\Users\Administrator\Desktop\示例数据')
# 读取数据
data2 = pd.read_excel('3.xlsx')
data2_message = data2['详情']
def wenben(a):
b = 10000
for i in range(len(a)):
i_str = str(b+i+1)
file_name = i_str+ '.txt'
d = a[i]
f = open('3详情/'+file_name,'w', encoding="utf-8")
f.write(d)
f.close()
wenben(data2_message)
2.dataframe作为文本内容
import pickle
df_bytes = pickle.dumps(df) #保存为字节型文件
pd.read_pickle(df_bytes) #读取文件
#例子:按月份文本处理
# def through_time(data): #首先返回所有留言的:开始时间+结束时间+持续时间
# example_dt = data['留言时间']
# example_dt_list = list(example_dt) #数据格式,如[Timestamp('2017-06-08 17:31:20'),Timestamp('2019-11-22 14:42:14'),Timestamp('2019-04-28 17:32:51'),Timestamp('2018-05-17 08:32:04'),Timestamp('2019-11-05 10:31:38')]
# from_1 = str(example_dt_list[0])[0:10] #截取所需日期,如'2019-11-22'
# to_1 = str(example_dt_list[0])[0:10]
# f = datetime.datetime.strptime(from_1,'%Y-%m-%d').date() #转换格式,如datetime.date(2019, 11, 22)
# t = datetime.datetime.strptime(to_1,'%Y-%m-%d').date()
# for i in range(len(example_dt)):
# temp = str(example_dt_list[i])[0:10]
# temp_dt = datetime.datetime.strptime(temp,'%Y-%m-%d').date()
# if (f-temp_dt).days > 0:
# f = temp_dt
# elif (t-temp_dt).days < 0:
# t = temp_dt
# through = str(t-f)[0:7]#持续时间
# print('问题ID为:'+str(id)+'的留言反映时间段从'+str(f)+'到'+str(t),'持续时间为:'+str(through))
# return f,t,through
# f,t,through = through_time(biaoge2)
# next_month = f
# while f<t:
# example = biaoge2.copy()
# example_dt = biaoge2['留言时间']
# example_dt_list = list(example_dt)
# next_month = days_in_month(f) + relativedelta(months=+1) #下一个月
# for i in range(len(biaoge2)):
# temp = str(example_dt_list[i])[0:10]
# temp_dt = datetime.datetime.strptime(temp,'%Y-%m-%d').date()
# if next_month < temp_dt or temp_dt < f:
# example = example.drop(index=i)
# c = pickle.dumps(example) #将dataframe数据装换为string数据,字节型形式,examples.to_pickle('名称‘)
# file_name = str(f)+'到'+str(next_month)+'.txt'
# f1 = open('按月份数据/'+file_name,'wb')
# f1.write(c)
# f1.close()
# f = next_month
# #执行该方法时重新执行一下def through_time
# os.chdir(r'C:\Users\Administrator\Desktop\大三\泰迪学社\第8届泰迪杯\01040730kg73\示例数据\按月份数据')
# while f<t:
# example = biaoge2.copy()
# example_dt = biaoge2['留言时间']
# example_dt_list = list(example_dt)
# next_month = days_in_month(f) + relativedelta(months=+1) #下一个月
# file_name = str(f)+'到'+str(next_month)+'.txt'
# a_month_data = pd.read_pickle(file_name)
# f = next_month