续集: 一. 背景 接着上一回,多线程threading本身没有限制自己的线程数量,但是当线程数量过多时,就会报错,导致数据收集不全,所以要选择设置threading开启的线程数量 二. 解决 修改脚本,开启线程数量限制,同时开启300个线程运行收集版本数据 vim get_version.py import paramiko import requests import datetime import redis import json import os import subprocess from threading import Timer import logging import threading import socket logger = logging.getLogger('ktzlogger') class GetVersion(threading.Thread): def __init__(self): threading.Thread.__init__(self) def par_ver(self, host0, app_name): client = paramiko.SSHClient() client.set_missing_host_key_policy(paramiko.AutoAddPolicy()) client.connect(hostname=host0, port=2222, username='dc') stdin, stdout, stderr = client.exec_command("awk '/jfrog/' /data/scripts/deploy_%s.sh | tail -1 | awk '{print $4}' | awk -F/ '{print $4}'" % (app_name)) out = stdout.read().decode('utf-8') err = stderr.read().decode('utf-8') if out == '': out = '0' client.close() out = out.strip() return out def chaoshi(self, args, timeout): p = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE) timer = Timer(timeout, lambda process: process.kill(), [p]) try: timer.start() stdout, stderr = p.communicate() return_code = p.returncode if stdout != b'': return True else: return False finally: timer.cancel() def worker(self,host0,app_name,objs,i,center,hosts_str,env,threadmax): addr = socket.gethostbyname(socket.getfqdn(socket.gethostname())) if addr == '192.168.89.133': ver = 'v1' else: result = self.chaoshi(['telnet', host0, '53742'], 2) if result == False: os.system('echo %s >> /tmp/hosts_questions.txt' % (host0)) return ver = self.par_ver(host0, app_name) now = datetime.datetime.now().strftime('%Y-%m-%d-%H:%M') objs.append({'v_id':i, 'v_gps':center, 'v_proj':app_name, 'v_tag':ver, 'v_hosts':hosts_str, 'v_time':now, 'v_env':env}) threadmax.release() def main(self): os.system('rm -f /tmp/hosts_questions.txt') all_keys = requests.get("http://172.16.3.100:10082/assets/inventory/--list/None/") all_objs = all_keys.json() i = 1 objs = [] gps = [] threads = [] env_list=[] threadmax = threading.BoundedSemaphore(300) for item in all_objs: threadmax.acquire() if item == 'all' or item == '_meta': continue if 'ktz_data_apps' in item: env = item.split('_ktz_data_apps_')[0] center = 'ktz_data_apps' app_name = item.split('_ktz_data_apps_')[-1] elif 'ktz_m' in item: env = item.split('_ktz_m_')[0] center = 'ktz_m' app_name = item.split('_ktz_m_')[-1] else: env = item.split('_')[0] center = item.split('_')[1] app_name = item.split('_')[-1] if env not in env_list: env_list.append(env) if center not in gps: gps.append(center) hosts = all_objs[item]['hosts'] str = '' for host in hosts: str += host + ',' hosts_str = str host0 = hosts[0] t = threading.Thread(target=self.worker, args=(host0,app_name,objs,i,center,hosts_str,env,threadmax)) t.start() threads.append(t) i += 1 for t in threads: t.join() print(objs) print(env_list) red = redis.Redis(host='localhost', port=6379, db=1) all={'gps': gps , 'deploys': objs} objs_json = json.dumps(all) red.set('versions', objs_json) if __name__ == '__main__': gv = GetVersion() gv.main() 测试运行大概3分钟15秒左右运行结束,而且数据不会丢失 三. 总结 多线程虽然好用,但是有时候要注意线程数量,如果数量太多,会导致报错,进而丢失数据,最终重启celery和supervisord即可
celery-threading优化续集
最新推荐文章于 2023-08-11 17:07:02 发布