转载出自:http://www.rainsts.net/article.asp?id=1013
3. RequestHandler
这是我们使用 Tornado 打交道最多的一个类了。
class RequestHandler(object):
"""Subclass this class and define get() or post() to make a handler.
If you want to support more methods than the standard GET/HEAD/POST, you
should override the class variable SUPPORTED_METHODS in your
RequestHandler class.
"""
SUPPORTED_METHODS = ("GET", "HEAD", "POST", "DELETE", "PUT")
SUPPORTED_METHODS 定义了我们能处理的 HTTP Method,也就是我们能 override 的处理方法。
def head(self, *args, **kwargs):
raise HTTPError(405)
def get(self, *args, **kwargs):
raise HTTPError(405)
def post(self, *args, **kwargs):
raise HTTPError(405)
def delete(self, *args, **kwargs):
raise HTTPError(405)
def put(self, *args, **kwargs):
raise HTTPError(405)
prepare 方法要结合 _execute 和 finish 来理解。从前面分析的流程中,_execute 是 RequestHandler 被调用的入口,它首先调用了 prepare(),并且会判断 self._finished 标记,而后才执行 get / post 。
def prepare(self):
"""Called before the actual handler method.
Useful to override in a handler if you want a common bottleneck for
all of your requests.
"""
pass
def finish(self, chunk=None):
...
if not self.application._wsgi:
self.flush(include_footers=True)
self.request.finish()
self._log()
self._finished = True
def _execute(self, transforms, *args, **kwargs):
...
self.prepare()
if not self._finished:
getattr(self, self.request.method.lower())(*args, **kwargs)
if self._auto_finish and not self._finished: self.finish()
只要我们 override prepare(),并且调用 finish() 就可以阻断 get / post 的执行。这个可以用来做缓存或者其他前置处理。
class TestHandler(RequestHandler):
def prepare(self):
# 判断缓存是否过期 (此处暂略)
self.write("cache\n")
self.finish()
def get(self):
self.write("Hello, World!\n")
$ curl http://localhost:8000
cache
write 值得提及的就是,如果参数是个 dict 的话直接输出 JSON。
def write(self, chunk):
"""Writes the given chunk to the output buffer.
To write the output to the network, use the flush() method below.
If the given chunk is a dictionary, we write it as JSON and set
the Content-Type of the response to be text/javascript.
"""
assert not self._finished
if isinstance(chunk, dict):
chunk = escape.json_encode(chunk)
self.set_header("Content-Type", "text/javascript; charset=UTF-8")
chunk = _utf8(chunk)
self._write_buffer.append(chunk)
render() 实际是通过 render_string() 来完成对模板的调用。
def render_string(self, template_name, **kwargs):
# 默认情况下,我们可以通过 "template_path" 来设定模板存储路径
# If no template_path is specified, use the path of the calling file
template_path = self.application.settings.get("template_path")
# 如果没有设定模板路径,则向上追溯调用堆栈帧,直到找到一个源文件名不等于当前
# web.py 的文件名,从而提取路径。通常也就是我们自定义项目的根路径。
if not template_path:
frame = sys._getframe(0)
web_file = frame.f_code.co_filename
while frame.f_code.co_filename == web_file:
frame = frame.f_back
template_path = os.path.dirname(frame.f_code.co_filename)
# 模板缓存容器
if not getattr(RequestHandler, "_templates", None):
RequestHandler._templates = {}
# 处理模板,并保存到缓存
if template_path not in RequestHandler._templates:
loader = self.application.settings.get("template_loader") or template.Loader(template_path)
RequestHandler._templates[template_path] = loader
t = RequestHandler._templates[template_path].load(template_name)
# 传递给模板的隐式参数
args = dict(
handler=self,
request=self.request,
current_user=self.current_user,
locale=self.locale,
_=self.locale.translate,
static_url=self.static_url,
xsrf_form_html=self.xsrf_form_html,
reverse_url=self.application.reverse_url
)
# ui
args.update(self.ui)
# 传递我们在 get / post 中调用 render() 时传递的参数
args.update(kwargs)
# 生成 HTML
return t.generate(**args)
对 xsrf 攻击防范比较简单,首先 xsrf_token 属性会生成一个临时唯一标记,并写入 cookie。而后在模板中通过 xsrf_form_html() 将这个标记再次作为隐式参数写入。当执行 POST 提交时,实际上就会有两份标记,通过对比这两份标记就可以确保请求发自本站,因为攻击者的跨站连接通常无法凑齐两个的。
@property
def xsrf_token(self):
"""The XSRF-prevention token for the current user/session.
To prevent cross-site request forgery, we set an '_xsrf' cookie
and include the same '_xsrf' value as an argument with all POST
requests. If the two do not match, we reject the form submission
as a potential forgery.
See http://en.wikipedia.org/wiki/Cross-site_request_forgery
"""
# 每次请求都会生成新的 RequestHandler 对象,因此这段代码总是会被执行。
if not hasattr(self, "_xsrf_token"):
# 从 Cookie 中提取 _xsrf 标记
token = self.get_cookie("_xsrf")
# 如果没有,也就是第一次请求时生成新的标记。
if not token:
token = binascii.b2a_hex(uuid.uuid4().bytes)
expires_days = 30 if self.current_user else None
self.set_cookie("_xsrf", token, expires_days=expires_days)
self._xsrf_token = token
return self._xsrf_token
def check_xsrf_cookie(self):
"""Verifies that the '_xsrf' cookie matches the '_xsrf' argument.
To prevent cross-site request forgery, we set an '_xsrf' cookie
and include the same '_xsrf' value as an argument with all POST
requests. If the two do not match, we reject the form submission
as a potential forgery.
See http://en.wikipedia.org/wiki/Cross-site_request_forgery
"""
...
# 从请求参数,也即是 xsrf_form_html() 所生成的那个 hidden input,提取 _xsrf。
token = self.get_argument("_xsrf", None)
if not token: raise HTTPError(403, "'_xsrf' argument missing from POST")
# 比对从 hidden input 和 cookie 提取的标记,从而获知是否恶意提交。
if self.xsrf_token != token:
raise HTTPError(403, "XSRF cookie does not match POST argument")
def xsrf_form_html(self):
"""An HTML <input/> element to be included with all POST forms.
It defines the _xsrf input value, which we check on all POST
requests to prevent cross-site request forgery. If you have set
the 'xsrf_cookies' application setting, you must include this
HTML within all of your HTML forms.
See check_xsrf_cookie() above for more information.
"""
return '<input type="hidden" name="_xsrf" value="' + escape.xhtml_escape(self.xsrf_token) + '"/>'
def _execute(self, transforms, *args, **kwargs):
...
# 想要 XSRF 生效,别忘记设置 xsrf_cookie。
# If XSRF cookies are turned on, reject form submissions without
# the proper cookie
if self.request.method == "POST" and self.application.settings.get("xsrf_cookies"):
self.check_xsrf_cookie()
...
所谓的 Non-blocking 和 Asynchronous Request 目前看来还是形式大于实际。它并不会真的使用类似 ThreadPool 之类的机制去异步执行 get / post,而是需要我们的代码本身就支持异步调用。也就是说在 single-threading 模式下,如果 get 里面的代码不支持异步调用,就算使用了 @asynchronous,亦然会被阻塞,后续的请求必须等当前处理全部完成后才被执行。
从 _execute 来看,只有让 self._auto_finish = False 才能阻止 self.finish() 被调用,阻止 HTTP 连接被关闭。
def _execute(self, transforms, *args, **kwargs):
...
getattr(self, self.request.method.lower())(*args, **kwargs)
if self._auto_finish and not self._finished: self.finish()
...
asynchronous 就做了 "self._auto_finish = False" 这么一件事,以确保 callback 被调用前连接还活着。
def asynchronous(method):
"""Wrap request handler methods with this if they are asynchronous.
If this decorator is given, the response is not finished when the
method returns. It is up to the request handler to call self.finish()
to finish the HTTP request. Without this decorator, the request is
automatically finished when the get() or post() method returns.
class MyRequestHandler(web.RequestHandler):
@web.asynchronous
def get(self):
http = httpclient.AsyncHTTPClient()
http.fetch("http://friendfeed.com/", self._on_download)
def _on_download(self, response):
self.write("Downloaded!")
self.finish()
"""
@functools.wraps(method)
def wrapper(self, *args, **kwargs):
if self.application._wsgi:
raise Exception("@asynchronous is not supported for WSGI apps")
self._auto_finish = False
return method(self, *args, **kwargs)
return wrapper
记得在 callback 里调用 finish(),否则就有麻烦了。
------------- 分隔线 -------------------
从代码上来看,Tornado 还有很长的路要走。就当前(tornado-0.2)而言它或许是个很好的 WebServer,但离 Django 甚至是 Flask 这样的 Web Framework 还很远。