基于上一篇博客中的问题,我尝试用python改成多进程运行:https://blog.csdn.net/linxi7/article/details/81317704
#!/usr/bin/env python
import time, datetime
import os, sys, commands
import socket
import threading
OSS_DIR="/data/test"
test_name = "t_%s" % time.strftime("%Y%m%d")
test_name_LAST_DAY = "t_%s" % ((datetime.datetime.now() + datetime.timedelta(days=-1)).strftime("%Y%m%d"))
test_name_3_DAY_AGO = "t_%s" % ((datetime.datetime.now() + datetime.timedelta(days=-2)).strftime("%Y%m%d"))
test_name_7_DAY_AGO = "t_%s" % ((datetime.datetime.now() + datetime.timedelta(days=-6)).strftime("%Y%m%d"))
test_name_14_DAY_AGO = "t_%s" % ((datetime.datetime.now() + datetime.timedelta(days=-13)).strftime("%Y%m%d"))
time_of_system = time.strftime("%Y-%m-%d")
def exec_commands(cmd):
result = commands.getoutput(cmd)
return result
def travel_files(keyword, files, output_file):
for single_file in files:
file_obj = open(single_file).read().rstrip()
for everyline in file_obj.split('\n'):
if keyword in everyline:
cmd_getid = '''echo "%s" | awk -F'|' '{print $2}' |sort | uniq''' % everyline
userid = exec_commands(cmd_getid)
output_file.write(userid + '\n')
def get_action(filename, oss_file_name_date, filter_word):
register_file = "%s_%s.txt" % (filename, socket.gethostname())
if os.path.exists(register_file):
file_modify_time = time.strftime("%Y-%m-%d", time.localtime(os.stat(register_file).st_mtime))
else:
file_modify_time = ''
cmd_date = "find %s -type f -name '%s*'" % (OSS_DIR, oss_file_name_date)
file_date = exec_commands(cmd_date).split()
select_number = 600
if file_modify_time != time_of_system:
if os.path.exists(register_file):
os.remove(register_file)
output_file = open(register_file, 'w+')
t = threading.Thread(target=travel_files, args=(filter_word, file_date, output_file,))
t.start()
while threading.active_count() > select_number:
time.sleep(1)
if __name__ == '__main__':
#register 1 day ago
get_action("register_file_1_day_ago", test_name_LAST_DAY, "LOGID_ACNT_REGISTER")
#register 3 day ago
get_action("register_file_3_day_ago", test_name_3_DAY_AGO, "LOGID_ACNT_REGISTER")
#register 6 day ago
get_action("register_file_7_day_ago", test_name_7_DAY_AGO, "LOGID_ACNT_REGISTER")
#register 13 day ago
get_action("register_file_14_day_ago", test_name_14_DAY_AGO, "LOGID_ACNT_REGISTER")
#login today
get_action("login_file", test_name, "LOGID_ACNT_LOGIN")
#logout today
get_action("logout_file", test_name, "LOGID_ACNT_LOGOUT")
#register today
get_action("today_register_file", test_name, "LOGID_ACNT_REGISTER")
脚本是查找指定目录下符合时间条件的文件,然后对文件进行批量过滤,采用多线程的方式,能够提升串行执行的时间效果。