公司有个需求是这样的:
在隔离机 上开启UDP 服务,甲方数据往隔离机上传输,我们从隔离机上获取数据,由于112服务器接收好几种文件负载较高,所以,决定将112 服务器数据scp到内网111服务器,然后通过NFS服务器再传到我们这边服务器上来,从而展开数据分析的任务。我画个图哈!
之前版本:
后来版本:
在111 服务器写了一个多进程玩意儿消费的快一些:
是一个copy完,备份的过程:
大概逻辑就是这样字。
代码上一下:
#!/usr/bin/python
# -*- coding:utf-8 -*-
import tqdm
import sys
import os
import datetime
import time
from io import open
import multiprocessing
from conf import config
from utils import log_writer
from functools import partial
def main():
# 111 服务器的本地目录 方便scp 快速没有延迟
global local_directory
local_directory = config.config["Scp_config"]["local_directory"]
# 111 服务器的挂载目录 和机房服务器进行数据同步
global mount_directory
mount_directory = config.config["Scp_config"]["mount_directory"]
# 打印该日志
global log
log = log_writer.LogWriter("transer_local_file_counts_2")
# 备份目录
back_directory = config.config["Scp_config"]["back_directory"]
today = (datetime.datetime.now()).strftime('%Y%m%d')
global back_dir
back_dir = back_directory + today + '/'
# 获取目录下的文件列表
ok_files_list = listsipdir()
list0 = []
list1 = []
list2 = []
list3 = []
list4 = []
list5 = []
list6 = []
list7 = []
list8 = []
list9 = []
i = 0
if len(ok_files_list) == 0:
log.info('transfer local no data! Sleep 120s!')
time.sleep(120)
for ii in range(len(ok_files_list)):
# log.info(len(ok_files_list))
# log.info(ii)
if i % 10 == 0:
list0.append(ok_files_list[ii])
elif i % 10 == 1:
list1.append(ok_files_list[ii])
elif i % 10 == 2:
list2.append(ok_files_list[ii])
elif i % 10 == 3:
list3.append(ok_files_list[ii])
elif i % 10 == 4:
list4.append(ok_files_list[ii])
elif i % 10 == 5:
list5.append(ok_files_list[ii])
elif i % 10 == 6:
list6.append(ok_files_list[ii])
elif i % 10 == 7:
list7.append(ok_files_list[ii])
elif i % 10 == 8:
list8.append(ok_files_list[ii])
elif i % 10 == 9:
list9.append(ok_files_list[ii])
i += 1
if i == 10:
break
r_dic = {
'task_0': list0,
'task_1': list1,
'task_2': list2,
'task_3': list3,
'task_4': list4,
'task_5': list5,
'task_6': list6,
'task_7': list7,
'task_8': list8,
'task_9': list9
}
po = multiprocessing.Pool(10)
for i in range(0, 10):
task_id = str(i)
#log.info("===========for loop ============")
#log.info(task_id)
po.apply_async(transter_data, (r_dic, task_id))
po.close()
po.join()
def transter_data(filenames, task_id):
logger = log_writer.LogWriter('index_main_' + task_id)
logger.info('<== Trantser_data Function Start ==>')
if filenames:
total_num = len(filenames['task_' + task_id])
logger.info('task_' + task_id)
deal_num = total_num
while total_num - deal_num != total_num:
# while True:
# logger.info('<========================>')
if len(filenames['task_' + task_id]) == 0:
time.sleep(120)
break
# logger.info("len:")
# logger.info(len(filenames['task_' + task_id]))
for ok_file_name in filenames['task_'+task_id]:
# 进行local copy 到 mount 目录:
try:
logger.info('start copy!')
copy_file(ok_file_name, logger)
except:
logger.erro(' move Failed !')
continue
# logger.info(ok_file_name)
# local 目录 move 数据到 备份目录
if os.path.exists(back_dir):
try:
move_file(ok_file_name, logger)
logger.info('start move!')
except Exception as e:
logger.erro(' move Failed !')
continue
# logger.info(ok_file_name)
else:
try:
logger.info("create:")
logger.info(back_dir)
logger.info(ok_file_name)
os.makedirs(back_dir)
logger.info('start move!')
logger.info(ok_file_name)
move_file(ok_file_name, logger)
except Exception as e:
logger.erro(' move ok file Failed !')
continue
filenames['task_' + task_id].remove(ok_file_name)
deal_num -= 1
def copy_file(ok_file_name, logger):
old_file_path = local_directory + ok_file_name[0:-3]
old_ok_file_path = local_directory + ok_file_name
logger.info("old_file_path")
logger.info(old_file_path)
file_mount_directory = mount_directory + ok_file_name[0:-3]
ok_file_mount_directory = mount_directory + ok_file_name
with open(old_file_path, 'rb') as s:
with open(file_mount_directory, 'wb') as fd:
record_size = 1048576
records = iter(partial(s.read, record_size), b'')
size = int(os.path.getsize(os.path.abspath(old_file_path)) / record_size)
for data in tqdm.tqdm(records, total=size, unit='MB',
desc=file_mount_directory, mininterval=1, ncols=80, ):
fd.write(data)
logger.info("old_ok_file_path")
logger.info(old_ok_file_path)
with open(old_ok_file_path, 'rb') as s:
with open(ok_file_mount_directory, 'wb') as fd:
record_size = 1048576
records = iter(partial(s.read, record_size), b'')
size = int(os.path.getsize(os.path.abspath(old_ok_file_path)) / record_size)
for data in tqdm.tqdm(records, total=size, unit='MB',
desc=ok_file_mount_directory, mininterval=1, ncols=80, ):
fd.write(data)
def move_file(ok_file_name, logger):
old_file_path = local_directory + ok_file_name[0:-3]
old_ok_file_path = local_directory + ok_file_name
logger.info("start move file ")
# 数据文件
back_file = back_dir + ok_file_name[0:-3]
# 数据 OK 文件
back_ok_file = back_dir + ok_file_name
logger.info(back_file)
with open(old_file_path, 'rb') as s:
with open(back_file, 'wb') as fd:
record_size = 1048576
records = iter(partial(s.read, record_size), b'')
size = int(os.path.getsize(os.path.abspath(old_file_path)) / record_size)
for data in tqdm.tqdm(records, total=size, unit='MB',
desc=back_file, mininterval=1, ncols=80, ):
fd.write(data)
os.remove(old_file_path)
logger.info("start move ok file ")
logger.info(back_ok_file)
with open(old_ok_file_path, 'rb') as s:
with open(back_ok_file, 'wb') as fd:
record_size = 1048576
records = iter(partial(s.read, record_size), b'')
size = int(os.path.getsize(os.path.abspath(old_ok_file_path)) / record_size)
for data in tqdm.tqdm(records, total=size, unit='MB',
desc=back_ok_file, mininterval=1, ncols=80, ):
fd.write(data)
os.remove(old_ok_file_path)
def listsipdir():
ok_files_list = []
files_name = os.listdir(local_directory)
files_name.sort(key=lambda fn: os.path.getmtime(local_directory + "/" + fn),reverse=True)
for file_name in files_name:
if file_name.endswith('.ok'):
ok_files_list.append(file_name)
return ok_files_list
if __name__ == '__main__':
while True:
main()
10个进程效果非常棒,很快!