import re
import os
import pymysql
from datetime import datetime, date, timedelta
import time
import shutil
import sys
import _thread
def generate_file():
Speed_of_progress = open('%s/log.txt' % path, 'a', encoding='utf-8')
Speed_of_progress.write(modularName+yesterday+'日期数据开始拉取:' + time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) + '\n')
Speed_of_progress.close()
if modular == '4':
cmd = '大数据表拉取'
cmd = cmd.encode("utf-8").decode("latin1")
else:
cmd =
os.system(cmd)
cmd = "存到集群" % (modularName+yesterday, path)
os.system(cmd)
Speed_of_progress = open('%s/log.txt' % path, 'a', encoding='utf-8')
Speed_of_progress.write(modularName+yesterday+'日期数据拉取成功:' + time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) + '\n')
Speed_of_progress.close()
def combine_conditions(input_elms):
if len(input_elms) == 1:
return input_elms
all_combine_list = []
for i in range(len(input_elms)):
current_letter = input_elms[i]
if i+1 < len(input_elms):
other_letters = input_elms[i+1:]
new_elms = combine_conditions(other_letters)
for elm in new_elms:
all_combine_list.append(current_letter + '&' + elm)
return all_combine_list + input_elms
def generate():
Speed_of_progress = open('%s/log.txt' % path, 'a', encoding='utf-8')
Speed_of_progress.write('生成数据:' + time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) + '\n')
Speed_of_progress.close()
global dayOutput
dayOutput = '%s%s%s_out.txt' % (path, modularName, yesterday)
summary_dict = dict()
total_query = 0
total_audio_ms = 0
cachedTmpNum = 0
cachedNum = 0
textLen = 0
fileArr = os.listdir(path+modularName+yesterday)
fileArr.sort()
fileStr = ''
for filename in fileArr:
fileStr += ' ' +filename
do_cmd = os.popen("cd "+ path+modularName+yesterday +"; cat %s |awk -F '\x01' '{print $5}'|sort -u|wc -l" % fileStr)
total_user = do_cmd.read().strip()
for filename in fileArr:
with open(path+modularName+yesterday+'/'+filename, 'r', encoding='utf-8') as f:
while True:
try:
sents = f.readline()
except:
continue
if not sents:
break
if not sents.strip():
continue
total_query += 1
_split = sents.replace('\n', '').split('\x01')
if len(_split) == 9:
audio_ms = float(_split[6])
print(audio_ms)
total_audio_ms += audio_ms
textLenSin = float(_split[8])
textLen += textLenSin
if 'Cached' in _split[3]:
cachedTmpNum = 1
cachedNum += 1
else:
cachedTmpNum = 0
else:
continue
Speed_of_progress = open('%s/log.txt' % path, 'a', encoding='utf-8')
Speed_of_progress.write(filename+'处理完成:' + time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) + '\n')
Speed_of_progress.close()
user_query = round(int(total_query) / int(total_user), 2)
total_audio_user = round(int(total_audio_ms) / int(total_user) / 60000, 2)
template = '{0} query_num:{1} user_num:{2} total_audio_user:{3} user_query:{4} \n'
f_out = open(dayOutput, 'w',encoding='utf-8')
f_out.write(template.format('D%s' % yesterday, total_query, total_user, str(total_audio_user), str(user_query)))
f_out.close()
Speed_of_progress = open('%s/log.txt' % path, 'a', encoding='utf-8')
Speed_of_progress.write('数据生成成功:' + time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) + '\n')
Speed_of_progress.close()
if __name__ == '__main__':
global path
path = ''
global yesterday
modular = sys.argv[1]
yesterday = sys.argv[2]
pathOri = '/home/public/'
if modular == '4':
path = pathOri+'timeDateNovelNew/'
modularName = 'novel'
Speed_of_progress = open('%s/log.txt' % path, 'a', encoding='utf-8')
Speed_of_progress.write(modularName+'执行开始:' + time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) + '\n')
Speed_of_progress.close()
try:
generate()
except Exception as e:
with open('%s/log.txt' % path, 'a', encoding='utf-8') as fl:
fl.write(str(e))
fl.close()
Speed_of_progress = open('%s/log.txt' % path, 'a', encoding='utf-8')
Speed_of_progress.write('执行结束:' + time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) + '\n')
Speed_of_progress.close()