# -*- coding:utf-8 -*-
import re
import os
import pymysql
from datetime import datetime, date, timedelta
import time
import shutil
import sys
import _thread
# 连接数据库
#def dbh_connect(databaseName):
# global cur
# global conn
# conn = pymysql.connect("127.0.0.1", "root", "1234", databaseName)
# cur = conn.cursor()
# 省份地域对应关系
#def get_region():
# global regionRes
# regionRes = {}
# cur.execute("select * from table_name")
# region = cur.fetchall()
#
# for key in region:
# regionRes[key[3]] = key[5]
#拉取文件
def generate_file():
Speed_of_progress = open('%s/log.txt' % path, 'a', encoding='utf-8')
Speed_of_progress.write(modularName+yesterday+'日期数据开始拉取:' + time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) + '\n')
Speed_of_progress.close()
#拉取数据至集群
if modular == '4':
cmd = '大数据表拉取'
cmd = cmd.encode("utf-8").decode("latin1")
else:
cmd =
os.system(cmd)
#集群拉取数据至本地
cmd = "存到集群" % (modularName+yesterday, path)
os.system(cmd)
Speed_of_progress = open('%s/log.txt' % path, 'a', encoding='utf-8')
Speed_of_progress.write(modularName+yesterday+'日期数据拉取成功:' + time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) + '\n')
Speed_of_progress.close()
def combine_conditions(input_elms):
if len(input_elms) == 1:
return input_elms
all_combine_list = []
for i in range(len(input_elms)):
current_letter = input_elms[i]
if i+1 < len(input_elms):
other_letters = input_elms[i+1:]
new_elms = combine_conditions(other_letters)
for elm in new_elms:
all_combine_list.append(current_letter + '&' + elm)
return all_combine_list + input_elms
def generate():
Speed_of_progress = open('%s/log.txt' % path, 'a', encoding='utf-8')
Speed_of_progress.write('生成数据:' + time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) + '\n')
Speed_of_progress.close()
global dayOutput
dayOutput = '%s%s%s_out.txt' % (path, modularName, yesterday)
summary_dict = dict()
total_query = 0
total_audio_ms = 0
cachedTmpNum = 0
cachedNum = 0
textLen = 0
fileArr = os.listdir(path+modularName+yesterday)
fileArr.sort()
fileStr = ''
for filename in fileArr:
fileStr += ' ' +filename
do_cmd = os.popen("cd "+ path+modularName+yesterday +"; cat %s |awk -F '\x01' '{print $5}'|sort -u|wc -l" % fileStr)
total_user = do_cmd.read().strip()
for filename in fileArr:
with open(path+modularName+yesterday+'/'+filename, 'r', encoding='utf-8') as f:
while True:
try:
sents = f.readline()
except:
continue
if not sents:
break
if not sents.strip():
continue
total_query += 1
_split = sents.replace('\n', '').split('\x01')
if len(_split) == 9:
audio_ms = float(_split[6])
print(audio_ms)
total_audio_ms += audio_ms
textLenSin = float(_split[8])
textLen += textLenSin
if 'Cached' in _split[3]:
cachedTmpNum = 1
cachedNum += 1
else:
cachedTmpNum = 0
else:
continue
Speed_of_progress = open('%s/log.txt' % path, 'a', encoding='utf-8')
Speed_of_progress.write(filename+'处理完成:' + time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) + '\n')
Speed_of_progress.close()
user_query = round(int(total_query) / int(total_user), 2)
total_audio_user = round(int(total_audio_ms) / int(total_user) / 60000, 2)
#写入文件
template = '{0} query_num:{1} user_num:{2} total_audio_user:{3} user_query:{4} \n'
f_out = open(dayOutput, 'w',encoding='utf-8')
f_out.write(template.format('D%s' % yesterday, total_query, total_user, str(total_audio_user), str(user_query)))
f_out.close()
Speed_of_progress = open('%s/log.txt' % path, 'a', encoding='utf-8')
Speed_of_progress.write('数据生成成功:' + time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) + '\n')
Speed_of_progress.close()
if __name__ == '__main__':
global path
path = ''
global yesterday
modular = sys.argv[1]
# yesterday = '20200327'
yesterday = sys.argv[2]
pathOri = '/home/public/'
if modular == '4':
path = pathOri+'timeDateNovelNew/'
modularName = 'novel'
Speed_of_progress = open('%s/log.txt' % path, 'a', encoding='utf-8')
Speed_of_progress.write(modularName+'执行开始:' + time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) + '\n')
Speed_of_progress.close()
try:
#get_region() # 查询地域对应关系
#queryInfo() #查询发音人 pid,informant信息
#generatie_file() # 拉取数据
generate() #生成所有条件组合数据
except Exception as e:
with open('%s/log.txt' % path, 'a', encoding='utf-8') as fl:
fl.write(str(e))
fl.close()
Speed_of_progress = open('%s/log.txt' % path, 'a', encoding='utf-8')
Speed_of_progress.write('执行结束:' + time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) + '\n')
Speed_of_progress.close()
“相关推荐”对你有帮助么?
-
非常没帮助
-
没帮助
-
一般
-
有帮助
-
非常有帮助
提交