一文读懂flask
在实际工作中遇到了需要在通过gunicorn
启动flask
应用,并在gunicorn
启动的每个woker
之前为每个woker
初始化ros
节点信息,但在改造的过程中遇到通过worker
数指定节点名称时多次初始化节点时只有一个woker
的节点能够初始化成功,因此激发了我去了解gunicorn
是如何启动flask
应用?并且收到请求后事如何将请求分发给flask
应用?以及是如何做到多个woker
同时监听同一个端口,又是如何做到只有一个worker
处理请求的?接下来会通过分析flask
和gunicorn
的源码来进一步了解这些问题都是如何解决的,在这个过程中将会直观地看到flask
应用是如何为每一个请求维护请求上下文、应用上下文的生命周期,同时也会介绍flask
是如何调用钩子函数,包括before_first_request
、before_request
、after_request
、teardown_request
、teardown_appcontext
的调用时机。
1、flask应用启动过程-开发模式
1.1 flask的入口函数(run方法)
flask
自带了一个开发过程中使用的wsgi
服务,在启动的时候通常是实例化Flask
类,然后调用实例的run()
方法,在这里就不去关注加载配置文件的过程, 我们看一下run
方法具体执行了哪些步骤:
class Flask(Scaffold):
"""The flask object implements a WSGI application and acts as the central
object. It is passed the name of the module or package of the
application. Once it is created it will act as a central registry for
the view functions, the URL rules, template configuration and much more.
The name of the package is used to resolve resources from inside the
package or the folder the module is contained in depending on if the
package parameter resolves to an actual python package (a folder with
an :file:`__init__.py` file inside) or a standard module (just a ``.py`` file).
For more information about resource loading, see :func:`open_resource`.
"""
......
def run(
self,
host: t.Optional[str] = None,
port: t.Optional[int] = None,
debug: t.Optional[bool] = None,
load_dotenv: bool = True,
**options: t.Any,
) -> None:
# Change this into a no-op if the server is invoked from the
# command line. Have a look at cli.py for more information.
......
......
server_name = self.config.get("SERVER_NAME")
sn_host = sn_port = None
if server_name:
sn_host, _, sn_port = server_name.partition(":") # 根据server name获取监听的主机和端口
if not host:
if sn_host:
host = sn_host
else:
host = "127.0.0.1"
if port or port == 0:
port = int(port)
elif sn_port:
port = int(sn_port)
else:
port = 5000 # 默认端口,未指定端口且server name中未获取到端口信息时使用默认端口
options.setdefault("use_reloader", self.debug)
options.setdefault("use_debugger", self.debug)
options.setdefault("threaded", True)
cli.show_server_banner(self.env, self.debug, self.name, False) # 打印flask banner信息
from werkzeug.serving import run_simple
try:
run_simple(t.cast(str, host), port, self, **options) # 通过该方法启动wsgi服务端
finally:
# reset the first request information if the development server
# reset normally. This makes it possible to restart the server
# without reloader and that stuff from an interactive shell.
self._got_first_request = False
由以上代码可以看出,在flask的run方法中,主要是根据配置项获取启动server所需的必要参数,主要包含以下几个步骤:
- 根据配置文件获取server_name配置项,并根据配置项获取host和port信息
- 根据是否指定host和port信息,来获取是否使用默认端口和监听本地地址
- 通过调用werkzeug.serving包中的run_simple方法启动server
1.2 启动服务端(run_simple方法)
在这里我们需要关注一下run_simple的入参和处理过程,以下是run_simple方法的具体实现过程:
def run_simple(
hostname: str,
port: int,
application: "WSGIApplication",
use_reloader: bool = False,
use_debugger: bool = False,
use_evalex: bool = True,
extra_files: t.Optional[t.Iterable[str]] = None,
exclude_patterns: t.Optional[t.Iterable[str]] = None,
reloader_interval: int = 1,
reloader_type: str = "auto",
threaded: bool = False,
processes: int = 1,
request_handler: t.Optional[t.Type[WSGIRequestHandler]] = None,
static_files: t.Optional[t.Dict[str, t.Union[str, t.Tuple[str, str]]]] = None,
passthrough_errors: bool = False,
ssl_context: t.Optional[_TSSLContextArg] = None,
) -> None:
...... # 省略了端口校验、静态文件加载、debug模式加载等过程
srv = make_server(
hostname,
port,
application,
threaded,
processes,
request_handler,
passthrough_errors,
ssl_context,
fd=fd,
) # 工厂函数创建相应的server,主要包括ThreadedWSGIServer,ForkingWSGIServer,BaseWSGIServer三种服务端,在这里我们主要分析BaseWSGIServer的实现流程
if not is_running_from_reloader():
srv.log_startup()
_log("info", _ansi_style("Press CTRL+C to quit", "yellow"))
if use_reloader:
from ._reloader import run_with_reloader
run_with_reloader(
srv.serve_forever,
extra_files=extra_files,
exclude_patterns=exclude_patterns,
interval=reloader_interval,
reloader_type=reloader_type,
)
else:
srv.serve_forever()
这里BaseWSGIServer
存在一个继承关系:
BaseWSGIServer->server.HTTPServer->socketserver.TCPServer->socketserver.BaseServer
在make_server中根据配置项最终创建了一个BaseWSGIServer
的实例,所以我们直接看BaseWSGIServer
的__init__
方法:
class BaseWSGIServer(HTTPServer):
"""A WSGI server that that handles one request at a time.
Use :func:`make_server` to create a server instance.
"""
multithread = False
multiprocess = False
request_queue_size = LISTEN_QUEUE
def __init__(
self,
host: str,
port: int,
app: "WSGIApplication",
handler: t.Optional[t.Type[WSGIRequestHandler]] = None,
passthrough_errors: bool = False,
ssl_context: t.Optional[_TSSLContextArg] = None,
fd: t.Optional[int] = None,
) -> None:
if handler is None:
handler = WSGIRequestHandler # 指定请求处理类
# If the handler doesn't directly set a protocol version and
# thread or process workers are used, then allow chunked
# responses and keep-alive connections by enabling HTTP/1.1.
if "protocol_version" not in vars(handler) and (
self.multithread or self.multiprocess
):
handler.protocol_version = "HTTP/1.1"
self.host = host
self.port = port
self.app = app
self.passthrough_errors = passthrough_errors
self.address_family = address_family = select_address_family(host, port)
server_address = get_sockaddr(host, int(port), address_family)
# Remove a leftover Unix socket file from a previous run. Don't
# remove a file that was set up by run_simple.
if address_family == af_unix and fd is None:
server_address = t.cast(str, server_address)
if os.path.exists(server_address):
os.unlink(server_address)
# Bind and activate will be handled manually, it should only
# happen if we're not using a socket that was already set up.
super().__init__(
server_address, # type: ignore[arg-type]
handler,
bind_and_activate=False,
)
if fd is None:
# No existing socket descriptor, do bind_and_activate=True.
try:
self.server_bind() # 绑定server,监听指定socket
self.server_activate() # 开启请求监听
except BaseException:
self.server_close()
raise
else:
# Use the passed in socket directly.
self.socket = socket.fromfd(fd, address_family, socket.SOCK_STREAM)
self.server_address = self.socket.getsockname()
if address_family != af_unix:
# If port was 0, this will record the bound port.
self.port = self.server_address[1]
if ssl_context is not None:
if isinstance(ssl_context, tuple):
ssl_context = load_ssl_context(*ssl_context)
elif ssl_context == "adhoc":
ssl_context = generate_adhoc_ssl_context()
self.socket = ssl_context.wrap_socket(self.socket, server_side=True)
self.ssl_context: t.Optional["ssl.SSLContext"] = ssl_context
else:
self.ssl_context = None
class TCPServer(BaseServer):
address_family = socket.AF_INET
socket_type = socket.SOCK_STREAM
request_queue_size = 5
allow_reuse_address = False
def __init__(self, server_address, RequestHandlerClass, bind_and_activate=True):
"""Constructor. May be extended, do not override."""
BaseServer.__init__(self, server_address, RequestHandlerClass)
self.socket = socket.socket(self.address_family,
self.socket_type)
if bind_and_activate:
try:
self.server_bind()
self.server_activate()
except:
self.server_close()
raise
def server_bind(self):
"""Called by constructor to bind the socket.
May be overridden.
"""
if self.allow_reuse_address:
self.socket.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
self.socket.bind(self.server_address) # 绑定socket
self.server_address = self.socket.getsockname()
def server_activate(self):
"""Called by constructor to activate the server.
May be overridden.
"""
self.socket.listen(self.request_queue_size) # 监听请求
从代码中可以看出,在BaseWSGIServer
类实例化的过程中主要执行了一下几个步骤:
-
指定请求处理类,在不指定的情况下默认使用
WSGIRequestHandler
-
绑定server,监听指定socket
-
开启请求监听
1.3 启动请求处理循环(srv.serve_forever())
在监听socket
以后,需要启动一个无线循环来处理收到的请求,在flask
中是通过调用BaseServer
的serve_forever()
方法来实现的,接下来看一下这个方法的具体实现:
class BaseServer:
......
def serve_forever(self, poll_interval=0.5):
"""Handle one request at a time until shutdown.
Polls for shutdown every poll_interval seconds. Ignores
self.timeout. If you need to do periodic tasks, do them in
another thread.
"""
self.__is_shut_down.clear()
try:
# XXX: Consider using another file descriptor or connecting to the
# socket to wake this up instead of polling. Polling reduces our
# responsiveness to a shutdown request and wastes cpu at all other
# times.
with _ServerSelector() as selector:
selector.register(self, selectors.EVENT_READ) # 通过操作系统的select机制注册上述监听的socket的文件描述符,并指定监听的事件
while not self.__shutdown_request:
ready = selector.select(poll_interval) # 收到指定的事件,未收到事件时将在这里开启无限循环监听
# bpo-35017: shutdown() called during select(), exit immediately.
if self.__shutdown_request:
break
if ready:
self._handle_request_noblock() # 开始处理请求
self.service_actions()
finally:
self.__shutdown_request = False
self.__is_shut_down.set()
在serve_forever
方法中主要执行了下面三个步骤,用来实现监听收到请求的事件,并调用相应的方法处理收到的请求
-
通过操作系统的select机制注册上述监听的socket的文件描述符,并指定监听的事件
-
收到指定的事件
-
收到
ready
信号后,开始处理请求
在开发模式中,flask
是通过系统提供的select
系统调用监听socket
,当收到注册的事件后,触发调用请求处理函数来同步处理收到的请求,到这里为止flask
便完成了服务启动过程,接下来一起看一下flask
是如何处理收到的请求的
2.开发模式下flask是如何处理收到的请求的
2.1 收到请求
在了解了flask
是如何启动的,并且完成端口监听与事件注册以后,就是等待收到请求,在收到注册的事件后,会通过调用_handle_request_noblock
方法来处理收到的请求,接下来看一下这个方法的具体实现
class BaseServer:
......
def _handle_request_noblock(self):
"""Handle one request, without blocking.
I assume that selector.select() has returned that the socket is
readable before this function was called, so there should be no risk of
blocking in get_request().
"""
try:
request, client_address = self.get_request() # 获取收到的请求信息和请求来源的客户端地址
except OSError:
return
if self.verify_request(request, client_address): # 校验请求信息和client信息
try:
self.process_request(request, client_address) # 处理请求
except Exception:
self.handle_error(request, client_address)
self.shutdown_request(request)
except:
self.shutdown_request(request)
raise
else:
self.shutdown_request(request)
......
def verify_request(self, request, client_address):
"""Verify the request. May be overridden.
Return True if we should proceed with this request.
"""
return True
def process_request(self, request, client_address):
"""Call finish_request.
Overridden by ForkingMixIn and ThreadingMixIn.
"""
self.finish_request(request, client_address) # 调用方法实例化创建BaseWSGIServer对象时指定的请求处理类,开始处理请求
self.shutdown_request(request)
......
def finish_request(self, request, client_address):
"""Finish one request by instantiating RequestHandlerClass."""
self.RequestHandlerClass(request, client_address, self) # 实例化请求处理类
class TCPServer(BaseServer):
......
def get_request(self):
"""Get the request and client address from the socket.
May be overridden.
"""
return self.socket.accept() # 调用socket的accept方法从网络栈中获取收到的请求数据
在这个方法中,主要实现了从网络栈中获取收到的请求信息,并通过实例化请求处理类WSGIRequestHandler
来处理接收到的请求,那么接下来就开始进一步了解在这个类中是如何实现请求处理的,这个类也存在下面的继承关系
WSGIRequestHandler->BaseHTTPRequestHandler->socketserver.StreamRequestHandler->socketserver.BaseRequestHandler
首先从WSGIRequestHandler
这个类的初始化过程来分析看做了那些动作,由于这些类只有最顶级的父类实现了__init__
方法,所以从socketserver.BaseRequestHandler
这个类开始
class BaseRequestHandler:
def __init__(self, request, client_address, server):
self.request = request # 收到的请求信息
self.client_address = client_address # 请求的客户端地址
self.server = server # 这个参数是前面传递的BaseWSGIServer的实例
self.setup() # 初始化响应所需的对象信息,以及设置超时时间以及buffer大小等
try:
self.handle() # 开始执行请求处理流程
finally:
self.finish()
class StreamRequestHandler(BaseRequestHandler):
......
def setup(self):
self.connection = self.request
if self.timeout is not None:
self.connection.settimeout(self.timeout)
if self.disable_nagle_algorithm:
self.connection.setsockopt(socket.IPPROTO_TCP,
socket.TCP_NODELAY, True)
self.rfile = self.connection.makefile('rb', self.rbufsize) # 创建读取请求信息的文件描述符
if self.wbufsize == 0: # 创建响应socket文件描述符对象
self.wfile = _SocketWriter(self.connection)
else:
self.wfile = self.connection.makefile('wb', self.wbufsize)
在这段代码中主要做了一下几个步骤:
- 创建读取请求信息的文件描述符
- 初始化响应所需的对象信息,以及设置超时时间以及buffer大小等
- 调用
handle
方法开始处理请求
2.2 请求处理过程
在flask
中具体执行处理请求的方法是WSGIRequestHandler
这个请求处理类的handle
方法,但是在这个方法的实现中又是直接调用的父类的同名方法,所以接下来我们一起看一下这两个方法的实现:
class WSGIRequestHandler(BaseHTTPRequestHandler):
"""A request handler that implements WSGI dispatching."""
......
def handle(self) -> None:
"""Handles a request ignoring dropped connections."""
try:
super().handle()
except (ConnectionError, socket.timeout) as e:
self.connection_dropped(e)
except Exception as e:
if self.server.ssl_context is not None and is_ssl_error(e):
self.log_error("SSL error occurred: %s", e)
else:
raise
class BaseHTTPRequestHandler(socketserver.StreamRequestHandler):
......
def handle_one_request(self):
"""Handle a single HTTP request.
You normally don't need to override this method; see the class
__doc__ string for information on how to handle specific HTTP
commands such as GET and POST.
"""
try:
self.raw_requestline = self.rfile.readline(65537) # 通过前面创建的文件描述符读取请求数据
if len(self.raw_requestline) > 65536:
self.requestline = ''
self.request_version = ''
self.command = ''
self.send_error(HTTPStatus.REQUEST_URI_TOO_LONG)
return
if not self.raw_requestline:
self.close_connection = True
return
if not self.parse_request(): # 解析请求数据,读取请求头、请求方法、路径等信息
# An error code has been sent, just exit
return
mname = 'do_' + self.command # 构建请求方法名称
if not hasattr(self, mname):
self.send_error(
HTTPStatus.NOT_IMPLEMENTED,
"Unsupported method (%r)" % self.command)
return
method = getattr(self, mname) # 获取处理相应请求的方法
method() # 调用方法处理请求
self.wfile.flush() # 强制刷新响应描述符,发送响应信息
except socket.timeout as e:
#a read or a write timed out. Discard this connection
self.log_error("Request timed out: %r", e)
self.close_connection = True
return
def handle(self):
"""Handle multiple requests if necessary."""
self.close_connection = True
self.handle_one_request()
while not self.close_connection:
self.handle_one_request()
从BaseHTTPRequestHandler
的handle
方法中,可以看到最终调用的是handle_one_request
方法,在这个方法中又主要执行了一下几个步骤,用来处理请求并发送响应
- 通过文件描述符读取请求数据
- 解析请求数据,读取请求头、请求方法、路径等信息
- 构建请求方法名称
- 获取处理相应请求的方法
- 调用方法处理请求
- 强制刷新响应描述符,发送响应信息
这个方法中还隐藏了一个过程就是如何获取到处理请求的方法,从构建其你去的方法名称可以看出,这里是通过一个固定的格式do_method
的方式构建的,然后通过getattr
方法获取处理的方法,而在WSGIRequestHandler
类中又通过定义了一个__getattr__
方法来处理调用这个方法时执行的逻辑,具体代码如下
class WSGIRequestHandler(BaseHTTPRequestHandler):
"""A request handler that implements WSGI dispatching."""
......
def run_wsgi(self) -> None:
......
def execute(app: "WSGIApplication") -> None:
application_iter = app(environ, start_response) # 这里的app便是我们自己创建的Flask应用实例,通过调用flask实例的__call__方法来处理请求
try:
for data in application_iter:
write(data)
if not headers_sent:
write(b"")
if chunk_response:
self.wfile.write(b"0\r\n\r\n")
finally:
if hasattr(application_iter, "close"):
application_iter.close() # type: ignore
try:
execute(self.server.app)
except (ConnectionError, socket.timeout) as e:
self.connection_dropped(e, environ)
except Exception as e:
if self.server.passthrough_errors:
raise
if status_sent is not None and chunk_response:
self.close_connection = True
try:
# if we haven't yet sent the headers but they are set
# we roll back to be able to set them again.
if status_sent is None:
status_set = None
headers_set = None
execute(InternalServerError())
except Exception:
pass
from .debug.tbtools import DebugTraceback
msg = DebugTraceback(e).render_traceback_text()
self.server.log("error", f"Error on request:\n{msg}")
......
def __getattr__(self, name: str) -> t.Any:
# All HTTP methods are handled by run_wsgi.
if name.startswith("do_"):
return self.run_wsgi
# All other attributes are forwarded to the base class.
return getattr(super(), name)
根据这段代码可以看到,在调用getattr
方法时,如果方法名称是以do_
开头的,返回的是run_wsgi
方法,而在这个方法中又是通过调用flask
应用实例的__call__
方法来处理请求,并响应结果,那么接下来进入到Flask
的这个方法区看看到底执行了那些过程
2.3 Flask调用注册的路由处理请求
在Flask
这个类中定义了一个__call__
方法,在调用对象时,会自动执行这个方法,而在这个方法中,又返回了wsgi_app
方法的调用结果,以下是代码实现:
class Flask(Scaffold):
......
def wsgi_app(self, environ: dict, start_response: t.Callable) -> t.Any:
"""The actual WSGI application. This is not implemented in
:meth:`__call__` so that middlewares can be applied without
losing a reference to the app object. Instead of doing this::
app = MyMiddleware(app)
It's a better idea to do this instead::
app.wsgi_app = MyMiddleware(app.wsgi_app)
Then you still have the original application object around and
can continue to call methods on it.
.. versionchanged:: 0.7
Teardown events for the request and app contexts are called
even if an unhandled error occurs. Other events may not be
called depending on when an error occurs during dispatch.
See :ref:`callbacks-and-errors`.
:param environ: A WSGI environment.
:param start_response: A callable accepting a status code,
a list of headers, and an optional exception context to
start the response.
"""
ctx = self.request_context(environ)
error: t.Optional[BaseException] = None
try:
try:
ctx.push()
response = self.full_dispatch_request()
except Exception as e:
error = e
response = self.handle_exception(e)
except: # noqa: B001
error = sys.exc_info()[1]
raise
return response(environ, start_response)
finally:
if self.should_ignore_error(error):
error = None
ctx.auto_pop(error) # 销毁当前请求上下文和应用上下文信息,并会依次执行通过teardown_request、teardown_appcontext装饰器装饰过的方法
def __call__(self, environ: dict, start_response: t.Callable) -> t.Any:
"""The WSGI server calls the Flask application object as the
WSGI application. This calls :meth:`wsgi_app`, which can be
wrapped to apply middleware.
"""
return self.wsgi_app(environ, start_response)
从这个方法中可以看到,在开始处理请求之前,会为当前请求创建一个请求上下文,并通过_request_ctx_stack
这个栈实现上下文的线程隔离,同时在将请求上下文存储之前有人会未当前请求创建一个应用上下文对象,存储到_implicit_app_ctx_stack
这个栈中具体实现请感兴趣的小伙伴自行翻看源码进一步了解,上下文创建完成后,随机开始调用full_dispatch_request
这个方法来处理请求,在处理完成后会销毁当前上下文信息,并会依次执行通过teardown_request
、teardown_appcontext
装饰器装饰过的方法,接下来看看这个方法又是怎么处理请求的
class Flask(Scaffold):
......
def dispatch_request(self) -> ResponseReturnValue:
"""Does the request dispatching. Matches the URL and returns the
return value of the view or error handler. This does not have to
be a response object. In order to convert the return value to a
proper response object, call :func:`make_response`.
.. versionchanged:: 0.7
This no longer does the exception handling, this code was
moved to the new :meth:`full_dispatch_request`.
"""
req = _request_ctx_stack.top.request
if req.routing_exception is not None:
self.raise_routing_exception(req)
rule = req.url_rule
# if we provide automatic options for this URL and the
# request came with the OPTIONS method, reply automatically
if (
getattr(rule, "provide_automatic_options", False)
and req.method == "OPTIONS"
):
return self.make_default_options_response()
# otherwise dispatch to the handler for that endpoint
return self.ensure_sync(self.view_functions[rule.endpoint])(**req.view_args)
def full_dispatch_request(self) -> Response:
"""Dispatches the request and on top of that performs request
pre and postprocessing as well as HTTP exception catching and
error handling.
.. versionadded:: 0.7
"""
self.try_trigger_before_first_request_functions() # 调用此方法,依次执行通过before_first_request装饰器装饰过的方法,会在接收到第一个请求的时候执行
try:
request_started.send(self)
rv = self.preprocess_request() # 依次执行通过before_request装饰器装饰过的方法
if rv is None:
rv = self.dispatch_request() # 通过view_functions这个map记录注册过的路由信息,key为注册的路径,value方法,调用当前请求路径对应的视图方法处理请求
except Exception as e:
rv = self.handle_user_exception(e)
return self.finalize_request(rv) # 构建响应对象,依次执行通过after_request装饰器装饰过的方法,并返回响应对象
到这里为止flask
处理请求的完整过程便已经结束,在这个方法里面主要做了以下几个步骤:
- 依次执行通过before_first_request装饰器装饰过的方法,会在接收到第一个请求的时候执行
- 依次执行通过before_request装饰器装饰过的方法
- 通过view_functions这个map记录注册过的路由信息,key为注册的路径,value方法,调用当前请求路径对应的视图方法处理请求
- 构建响应对象,依次执行通过after_request装饰器装饰过的方法,并返回响应对象