celery-threading优化续集

续集:
一. 背景
    接着上一回,多线程threading本身没有限制自己的线程数量,但是当线程数量过多时,就会报错,导致数据收集不全,所以要选择设置threading开启的线程数量
二. 解决
    修改脚本,开启线程数量限制,同时开启300个线程运行收集版本数据
    vim get_version.py
        import paramiko
        import requests
        import datetime
        import redis
        import json
        import os
        import subprocess
        from threading import Timer
        import logging
        import threading
        import socket

        logger = logging.getLogger('ktzlogger')

        class GetVersion(threading.Thread):
            def __init__(self):
                threading.Thread.__init__(self)

            def par_ver(self, host0, app_name):
                client = paramiko.SSHClient()
                client.set_missing_host_key_policy(paramiko.AutoAddPolicy())
                client.connect(hostname=host0, port=2222, username='dc')
                stdin, stdout, stderr = client.exec_command("awk '/jfrog/' /data/scripts/deploy_%s.sh | tail -1 | awk '{print $4}' | awk -F/ '{print $4}'" % (app_name))
                out = stdout.read().decode('utf-8')
                err = stderr.read().decode('utf-8')
                if out == '':
                    out = '0'
                client.close()
                out = out.strip()
                return out

            def chaoshi(self, args, timeout):
                p = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
                timer = Timer(timeout, lambda process: process.kill(), [p])
                try:
                    timer.start()
                    stdout, stderr = p.communicate()
                    return_code = p.returncode
                    if stdout != b'':
                        return True
                    else:
                        return False
                finally:
                    timer.cancel()

            def worker(self,host0,app_name,objs,i,center,hosts_str,env,threadmax):
                addr = socket.gethostbyname(socket.getfqdn(socket.gethostname()))
                if addr == '192.168.89.133':
                    ver = 'v1'
                else:
                    result = self.chaoshi(['telnet', host0, '53742'], 2)
                    if result == False:
                        os.system('echo %s >> /tmp/hosts_questions.txt' % (host0))
                        return
                    ver = self.par_ver(host0, app_name)
                now = datetime.datetime.now().strftime('%Y-%m-%d-%H:%M')
                objs.append({'v_id':i, 'v_gps':center, 'v_proj':app_name, 'v_tag':ver, 'v_hosts':hosts_str, 'v_time':now, 'v_env':env})
                threadmax.release()

            def main(self):
                os.system('rm -f /tmp/hosts_questions.txt')
                all_keys = requests.get("http://172.16.3.100:10082/assets/inventory/--list/None/")
                all_objs = all_keys.json()
                i = 1
                objs = []
                gps = []
                threads = []
                env_list=[]
                threadmax = threading.BoundedSemaphore(300)
                for item in all_objs:
                    threadmax.acquire()
                    if item == 'all' or item == '_meta':
                        continue
                    if 'ktz_data_apps' in item:
                        env = item.split('_ktz_data_apps_')[0]
                        center = 'ktz_data_apps'
                        app_name = item.split('_ktz_data_apps_')[-1]
                    elif 'ktz_m' in item:
                        env = item.split('_ktz_m_')[0]
                        center = 'ktz_m'
                        app_name = item.split('_ktz_m_')[-1]
                    else:
                        env = item.split('_')[0]
                        center = item.split('_')[1]
                        app_name = item.split('_')[-1]
                    if env not in env_list:
                        env_list.append(env)
                    if center not in gps:
                        gps.append(center)
                    hosts = all_objs[item]['hosts']
                    str = ''
                    for host in hosts:
                        str += host + ','
                    hosts_str = str
                    host0 = hosts[0]
                    t = threading.Thread(target=self.worker, args=(host0,app_name,objs,i,center,hosts_str,env,threadmax))
                    t.start()
                    threads.append(t)
                    i += 1

                for t in threads:
                    t.join()
                print(objs)
                print(env_list)
                red = redis.Redis(host='localhost', port=6379, db=1)
                all={'gps': gps , 'deploys': objs}
                objs_json = json.dumps(all)
                red.set('versions', objs_json)

        if __name__ == '__main__':
            gv = GetVersion()
            gv.main()

    测试运行大概3分钟15秒左右运行结束,而且数据不会丢失
三. 总结
    多线程虽然好用,但是有时候要注意线程数量,如果数量太多,会导致报错,进而丢失数据,最终重启celery和supervisord即可
  • 0
    点赞
  • 2
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值