import datetime
class Message:#每天的消息,用时间戳进行排序
timestamp:int
messageId:int #消息次序用于重新写入,使得有序
def __init__(self, timestamp, messageId):
self.timestamp= timestamp
self.messageId = messageId
def Conversion(timestamp):#将毫秒时间戳换算成日期
d = datetime.datetime.fromtimestamp(timestamp/1000)
dateStr=d.strftime("%Y-%m-%d")
#print(timestamp)
return dateStr
def getZhangsan(): #获取有效数据
address = r"D:\code_test\Text_processing\records\record_" # 文件存储地址
ansAddress=r"D:\code_test\Text_processing\ans" #答案存储地址
cnt = 0#有效信息条数
for id in range(0, 1000): # txt 文本id的位置,打开1000个文本
print(id)
with open(address + str(id) + '.txt', 'r', encoding="utf-8") as f:
list1 = f.readlines()
row = len(list1) # 文本行数
for i in range(0, row):
if (list1[i][10:12] == '张三'):
timestamp=list1[i][28:41] #毫秒时间戳
#print(timestamp)
timeStr=Conversion(int(timestamp))
#文件写入
fzs = open(ansAddress+"\\"+timeStr+'.txt', 'a')#写入对应天数文件
fzs.write(list1[i])
fzs.close()
cnt += 1
print(cnt)
def sortMessageByTime():
ansAddress = r"D:\code_test\Text_processing\ans" # 答案存储地址
for date in range(8,16): #按次序打开每天[8,15]日数据
dateStr="2022-04-"
if date<10:
dateStr+='0'
dateStr+=str(date)
print(dateStr)
MList=[] #用于时间戳排序
f=open(ansAddress+"\\"+ dateStr + '.txt', 'r')
list1 = f.readlines()
row = len(list1) # 文本行数
for i in range(0, row):
timestamp=int(list1[i][28:41])
MList.append( Message(timestamp,i) )
MList.sort(key=lambda e:e.timestamp)
#按时间写入
fzs = open(ansAddress + "_sort_by_timestamp\\" + dateStr + '.txt', 'a') # 按时间次序写入数据
for i in range(0, row):
msId=MList[i].messageId
fzs.write(list1[msId])
fzs.close()
f.close()
getZhangsan()
sortMessageByTime()
pathlib文件操作炒鸡好用
from pathlib import Path
path = Path (r"D:\all_code\huawei_code_cnt\pytest")
# print(path)
# is_file() is_dir()
#for p in path.iterdir(): 遍历目录下所有文件
#打卡路径下所有.txt文件并且 提取特定内容
for p in path.glob("*.txt"):
if p.is_file():
#print(p, end="是一个文件")
print("路径字符串:"+str(p))
with open(str(p), 'r', encoding="utf-8") as f:
list1 = f.readlines()
row = len(list1) # 文本行数
for i in range(0, row):
ansList = list1[i].split()
print(ansList)