前段时间我们的客户端总是Read timeout,定位以后发现是中间的消息转发系统消息堆积了。
消息堆积的原因是底层系统处理部分消息的时间比较长,异步IO转发消息是并发而不是并行的,这部分处理时间长的网络IO占用了大量并发切片时间和资源,导致原本处理很快的那部分请求也被阻塞了。
我们使用了线程池解决这个问题,实现并行的网络交互,避免了长时间请求阻塞其他请求,下面是系统架构模型及解决方案。
一、底层服务端
#!/usr/bin/env python
# -*- coding:utf-8 -*-
import time
import random
import tornado
import tornado.httpclient
from multiprocessing import Process
from concurrent.futures import ThreadPoolExecutor
from tornado.web import RequestHandler, Application
class MyHandler(RequestHandler):
executor = ThreadPoolExecutor(max_workers=100)
@tornado.gen.coroutine
def get(self):
start_time = time.time()
if self.application.port in [6405, 6406]:
yield tornado.gen.sleep(0.1)
else:
yield tornado.gen.sleep(random.choice([0.4, 0.1, 10]))
self.set_status(200)
self.set_header('Content-Type', 'application/json; charset=UTF-8')
self.finish({'code': 0, 'msg': 'OK'})
print(time.time() - start_time)
def async_app(port):
application = Application([(r'/test/?', MyHandler)], logging='info', debug=True, xsrf_cookies=False)
application.port = port
http_server = tornado.httpserver.HTTPServer(application)
http_server.listen(port)
tornado.ioloop.IOLoop.instance().start()
if __name__ == "__main__":
process = [Process(target=async_app, args=(port,)) for port in range(6405, 6415)]
for pro in process:
pro.daemon = True
pro.start()
for pro in process:
pro.join()
二、中间层转发端
#!/usr/bin/env python
# -*- coding:utf-8 -*-
import time
import json
import random
import tornado
import requests
import tornado.httpclient
from multiprocessing import Process
from tornado.concurrent import run_on_executor
from concurrent.futures import ThreadPoolExecutor
from tornado.web import RequestHandler, Application
client_1, client_2, client_3 = None, None, None # 全局连接池,多进程每个进程一个,多线程可复用
class MyHandler(RequestHandler):
request_executor = ThreadPoolExecutor(max_workers=400)
def __init__(self, application, request, **kwargs):
global client_1, client_2, client_3
# 选择curl_httpclient而不是默认simple_httpclient,否则会不停关闭新建连接,连接数量只有一个
tornado.httpclient.AsyncHTTPClient.configure('tornado.curl_httpclient.CurlAsyncHTTPClient')
# 保证force_instance=False,否则会生成大量连接实例,占用大量端口,max_clients表示最大连接数量
client_1 = tornado.httpclient.AsyncHTTPClient(force_instance=False, max_clients=100, defaults=dict(request_timeout=5))
client_2 = requests.Session()
client_3 = tornado.httpclient.AsyncHTTPClient(force_instance=False, max_clients=100, defaults=dict(request_timeout=5))
super(MyHandler, self).__init__(application, request, **kwargs)
@tornado.gen.coroutine
def get_response_1(self):
request = tornado.httpclient.HTTPRequest('http://127.0.0.1:%s/test/' % random.choice([6405, 6406]), headers={'Connection': 'keep-alive'}, request_timeout=5, validate_cert=False)
response = yield tornado.gen.Task(client_1.fetch, request)
self.response_1 = json.loads(response.body)
@run_on_executor(executor='request_executor')
# @tornado.gen.coroutine # 这里使用异步io会阻塞,造成消息堆积,客户端超时,改成使用线程池
def get_response_2(self):
try:
response_2 = client_2.get('http://127.0.0.1:%s/test/' % random.choice(range(6407, 6415)), headers={'Connection': 'keep-alive'}, timeout=5, verify=False)
self.response_2 = dict(response_2.json())
except:
pass
@tornado.gen.coroutine
def get_response_3(self):
request = tornado.httpclient.HTTPRequest('http://127.0.0.1:%s/test/' % random.choice([6405, 6406]), headers={'Connection': 'keep-alive'}, request_timeout=5, validate_cert=False)
response = yield tornado.gen.Task(client_3.fetch, request)
self.response_3 = json.loads(response.body)
@tornado.gen.coroutine
def get(self):
start_time = time.time()
yield self.get_response_1()
yield self.get_response_2()
yield self.get_response_3()
print('get response time :', time.time() - start_time)
self.set_status(200)
self.set_header('Content-Type', 'application/json; charset=UTF-8')
self.finish({'code': 0, 'msg': 'OK', 'deal_time': time.time() - start_time})
def async_app(port):
application = Application([(r'/test/?', MyHandler)], logging='info', debug=True, xsrf_cookies=False)
http_server = tornado.httpserver.HTTPServer(application)
http_server.listen(port)
tornado.ioloop.IOLoop.instance().start()
if __name__ == "__main__":
process = [Process(target=async_app, args=(port,)) for port in range(6305, 6315)]
for pro in process:
pro.daemon = True
pro.start()
for pro in process:
pro.join()
三、客户发信端
#!/usr/bin/env python
# -*- coding:utf-8 -*-
import time
import random
import requests
from requests.adapters import HTTPAdapter
from concurrent.futures import ThreadPoolExecutor
from requests.packages.urllib3.util import Retry
session = requests.Session()
session.mount('http://', HTTPAdapter(max_retries=Retry(total=0)))
def send_test_request(): # 发送测试请求的子进程
try:
response = requests.get('http://127.0.0.1:%s/test/' % random.choice(range(6305, 6315)), timeout=10)
print(response.json())
except Exception as e:
print('error', e)
with ThreadPoolExecutor(max_workers=100) as executor:
while True:
executor.submit(send_test_request)
time.sleep(0.01)