文章目录
简介
Tornado是一个Python web框架和异步网络库,使用非阻塞网络I/O,Tornado可以支撑上万级的连接,这使它成为长轮询、WebSockets和其他需要与每个用户建立长时间连接的应用程序的理想选择。
安装
pip install tornado
初试
import tornado.web
import tornado.ioloop
class MainHandler(tornado.web.RequestHandler):
def get(self):
self.write("Hello World!")
if __name__ == "__main__":
print("http://localhost:8888/")
app = tornado.web.Application([
(r"/", MainHandler),
])
app.listen(8888)
tornado.ioloop.IOLoop.current().start()
获取get请求参数
import tornado.web
import tornado.ioloop
class MainHandler(tornado.web.RequestHandler):
def get(self):
name = self.get_argument("name", default="") # 获取get请求的name参数,默认值为空
self.write("Hello {}!".format(name))
if __name__ == "__main__":
print("http://localhost:8888/?name=XerCis")
app = tornado.web.Application([
(r"/", MainHandler),
])
app.listen(8888)
tornado.ioloop.IOLoop.current().start()
访问http://localhost:8888/?name=XerCis
从命令行读取配置
调用tornado.options
模块的函数,步骤如下:
define()
定义参数parse_command_line()
读取命令行参数- 调用属性
options.xxx
提取参数
从命令行读取配置.py
import tornado.web
import tornado.ioloop
from tornado.options import define, options, parse_command_line
define("port", default=8888, help="运行端口", type=int)
class MainHandler(tornado.web.RequestHandler):
def get(self):
self.write("Hello World!")
if __name__ == "__main__":
parse_command_line()
print("http://localhost:{}/".format(options.port))
app = tornado.web.Application([
(r"/", MainHandler),
])
app.listen(options.port)
tornado.ioloop.IOLoop.instance().start()
运行
python "03 从命令行读取配置.py" --port=8000
正则表达式指定路径
/reverse/(\w+)
:正则表达式匹配任何以字符串/reverse/开始并紧跟着一个或多个字母的路径/wrap
:默认每40个字符换行
import textwrap
import tornado.web
import tornado.ioloop
import tornado.httpserver
from tornado.options import define, options, parse_command_line
define("port", default=8888, help="运行端口", type=int)
class ReverseHandler(tornado.web.RequestHandler):
def get(self, input):
self.write(input[::-1]) # 逆转
class WrapHandler(tornado.web.RequestHandler):
def post(self):
text = self.get_argument("text")
width = self.get_argument("width", 40) # 40个字符
self.write(textwrap.fill(text, int(width)))
if __name__ == "__main__":
parse_command_line()
print("http://localhost:{}/reverse/slipup".format(options.port)) # 返回pupils
app = tornado.web.Application(
handlers=[
(r"/reverse/(\w+)", ReverseHandler), # 正则表达式匹配任何以字符串/reverse/开始并紧跟着一个或多个字母的路径
(r"/wrap", WrapHandler)
]
)
http_server = tornado.httpserver.HTTPServer(app)
http_server.listen(options.port)
tornado.ioloop.IOLoop.instance().start()
使用 requests
库发送 GET 和 POST 请求
import requests
print(requests.get("http://localhost:8888/reverse/slipup").text)
print(requests.post("http://localhost:8888/wrap", params={"text": "Lorem ipsum dolor sit amet, consectetuer adipiscing elit."}).text)
# pupils
# Lorem ipsum dolor sit amet, consectetuer
# adipiscing elit.
url中获取多个参数
import textwrap
import tornado.web
import tornado.ioloop
import tornado.httpserver
from tornado.options import define, options, parse_command_line
define("port", default=8888, help="运行端口", type=int)
class InfoHandler(tornado.web.RequestHandler):
def get(self, name, age, gender):
self.write("{} {} {}".format(name, age, gender))
if __name__ == "__main__":
parse_command_line()
print("http://localhost:{}/XerCis/23/male".format(options.port))
print("http://localhost:{}/XerCis/23/male/".format(options.port))
app = tornado.web.Application(
handlers=[
(r"/(\w+)/(\d+)/(\w+)/?", InfoHandler), # 加了?后,Tornado解析url会自动加上/
# (r"/(?P<name>\w+)/(?P<age>\d+)/(?P<gender>\w+)/?", InfoHandler), # 可读性更强的写法,?P<对应变量名>
]
)
http_server = tornado.httpserver.HTTPServer(app)
http_server.listen(options.port)
tornado.ioloop.IOLoop.instance().start()
HTTP状态码
状态码 | 含义 | 情况 |
---|---|---|
404 | Not Found | 请求路径不匹配 |
400 | Bad Request | 调用了一个没有默认值的 get_argument() |
405 | Method Not Allowed | 调用了没有定义的HTTP方法 |
500 | Internal Server Error | 程序遇到不能让其退出的错误 |
200 | OK | 响应成功 |
重写 write_error()
添加常规的错误消息
import tornado.web
import tornado.ioloop
import tornado.httpserver
from tornado.options import define, options, parse_command_line
define("port", default=8888, help="运行端口", type=int)
class IndexHandler(tornado.web.RequestHandler):
def get(self):
user = self.get_argument("user")
if user == "500": # 手动引发错误
raise Exception
self.write("Hello, " + user)
def write_error(self, status_code, **kwargs):
self.write("You caused a %d error." % status_code)
if __name__ == "__main__":
parse_command_line()
print("http://localhost:{}/?user=XerCis".format(options.port))
app = tornado.web.Application(handlers=[(r"/", IndexHandler)])
http_server = tornado.httpserver.HTTPServer(app)
http_server.listen(options.port)
tornado.ioloop.IOLoop.instance().start()
使用 requests
库发送 GET 和 POST 请求
import requests
response = requests.get("http://localhost:8888/foo")
print(response, response.text) # 状态码404,请求路径不匹配
response = requests.get("http://localhost:8888/")
print(response, response.text) # 状态码400,调用了一个没有默认值的 get_argument()
response = requests.post("http://localhost:8888/")
print(response, response.text) # 状态码405,调用了没有定义的HTTP方法
response = requests.get("http://localhost:8888/", params={"user": "500"})
print(response, response.text) # 状态码500,程序遇到不能让其退出的错误
response = requests.get("http://localhost:8888/", params={"user": "XerCis"})
print(response, response.text) # 状态码200,响应成功
# <Response [404]> <html><title>404: Not Found</title><body>404: Not Found</body></html>
# <Response [400]> You caused a 400 error.
# <Response [405]> You caused a 405 error.
# <Response [500]> You caused a 500 error.
# <Response [200]> Hello, XerCis
表单和模板
创建文件夹 templates
,存放以下两个HTML
index.html
<!DOCTYPE html>
<html>
<head><title>对对联</title></head>
<body>
<h1>输入下联</h1>
<form method="post" action="/couplet">
<p>上联<br><input type="text" name="first" value="天行健,君子以自强不息。"></p>
<p>下联<br><input type="text" name="second"></p>
<input type="submit">
</form>
</body>
</html>
couplet.html
<!DOCTYPE html>
<html>
<head><title>对对联</title></head>
<body>
<h1>你的对联</h1>
<p>{{first}}<br><br>{{second}}</p>
</body>
</html>
双大括号中的单词是占位符,当渲染模板时调用 render()
传递关键字参数,以实际值代替占位符。
代码
import tornado.web
import tornado.ioloop
from pathlib import Path
import tornado.httpserver
from tornado.options import define, options, parse_command_line
define("port", default=8888, help="运行端口", type=int)
class IndexHandler(tornado.web.RequestHandler):
def get(self):
self.render("index.html")
class CoupletHandler(tornado.web.RequestHandler):
def post(self):
first = self.get_argument("first") # 上联
second = self.get_argument("second") # 下联
if second == "":
second = "地势坤,君子以厚德载物。"
self.render("couplet.html", first=first, second=second) # 传递关键字参数,指定什么值填充到HTML文件中占位符中{{}}
if __name__ == "__main__":
parse_command_line()
print("http://localhost:{}/".format(options.port))
app = tornado.web.Application(
handlers=[
(r"/", IndexHandler),
(r"/couplet", CoupletHandler)
],
template_path=Path(__file__).parent / "templates" # 模板文件
)
http_server = tornado.httpserver.HTTPServer(app)
http_server.listen(options.port)
tornado.ioloop.IOLoop.instance().start()
效果
模板语法
1. 模板填充表达式 {{ 表达式 }}
from tornado.template import Template
content = Template("<html><body><h1>{{ header }}</h1></body></html>")
print(content.generate(header="Welcome!"))
# b'<html><body><h1>Welcome!</h1></body></html>'
print(Template("{{ 1+1 }}").generate())
print(Template("{{ 'scrambled eggs'[-4:] }}").generate())
print(Template("{{ ', '.join([str(x*x) for x in range(10)]) }}").generate())
# b'2'
# b'eggs'
# b'0, 1, 4, 9, 16, 25, 36, 49, 64, 81'
2. 控制语句 {{% 控制语句 %}}
,支持 if、for、while、try,如:
{% if page is None %}
{% if len(entries) == 3 %}
{% for book in books %}
{% end %}
book.html,放置于templates文件夹下
<!DOCTYPE html>
<html>
<head>
<title>{{ title }}</title>
</head>
<body>
<h1>{{ header }}</h1>
<ul>
{% for book in books %}
<li>{{ book }}</li>
{% end %}
</ul>
</body>
</html>
代码
import tornado.web
import tornado.ioloop
from pathlib import Path
import tornado.httpserver
from tornado.options import define, options, parse_command_line
define("port", default=8888, help="运行端口", type=int)
class BookHandler(tornado.web.RequestHandler):
def get(self):
self.render(
"book.html",
title="首页",
header="推荐书籍",
books=[
"Python简明手册",
"流畅的Python",
"Python深度学习"
]
)
if __name__ == "__main__":
parse_command_line()
print("http://localhost:{}/book".format(options.port))
app = tornado.web.Application(
handlers=[
(r"/book", BookHandler)
],
template_path=Path(__file__).parent / "templates" # 模板文件
)
http_server = tornado.httpserver.HTTPServer(app)
http_server.listen(options.port)
tornado.ioloop.IOLoop.instance().start()
效果
3. 模板中使用函数
函数 | 功能 |
---|---|
escape(s) | 替换字符串中的&、<、>为HTML字符 |
url_escape(s) | 使用urllib.quote_plus() 替换字符串中的字符为URL编码 |
json_encode(val) | 使用json.dumps() 将val编码为JSON |
squeeze(s) | 将连续多个空白字符替换为一个空格 |
自定义函数 | 函数名作为参数传递 |
代码
from tornado.template import Template
def foo():
return 'World!'
print(Template("{{ escape('&<>') }}").generate())
print(Template("{{ url_escape('你好') }}").generate())
print(Template("{{ json_encode({'a': 1}) }}").generate())
print(Template("{{ squeeze(' a b c ') }}").generate())
print(Template("Hello {{ fun() }}").generate(fun=lambda: "World!"))
print(Template("Hello {{ fun() }}").generate(fun=foo))
# b'&amp;&lt;&gt;'
# b'%E4%BD%A0%E5%A5%BD'
# b'{"a": 1}'
# b'a b c'
# b'Hello World!'
# b'Hello World!'
推荐阅读:
模板综合示例
目录结构
style.css
body {
font-family: Helvetica, Arial, sans-serif;
width: 600px;
margin: 0 auto;
}
.replaced:hover {
color: #00f;
}
index.html
<!DOCTYPE html>
<html>
<head>
<link rel="stylesheet" href="{{ static_url("style.css") }}">
<title>文本替换</title>
</head>
<body>
<h1>文本替换</h1>
<p>替换文本中的单词将被源文本中的相同字母开头的单词替换。</p>
<form method="post" action="/poem">
<p>源文本<br>
<textarea rows=5 cols=70 name="source">
Twas brillig, and the slithy toves
Did gyre and gimble in the wabe:
All mimsy were the borogoves,
And the mome raths outgrabe.
</textarea></p>
<p>替换文本<br>
<textarea rows=5 cols=70 name="change">
When in the course of human events it becomes necessary for one people to dissolve the political bands which have connected them with another and to assume among the powers of the earth, the separate and equal station to which the Laws of Nature and of Nature's God.
</textarea></p>
<input type="submit">
</form>
</body>
</html>
munged.html
<!DOCTYPE html>
<html>
<head>
<link rel="stylesheet" href="{{ static_url("style.css") }}">
<title>文本替换</title>
</head>
<body>
<h1>结果</h1>
<p>
{% for line in change_lines %}
{% for word in line.split(' ') %}
{% if len(word) > 0 and word[0] in source_map %}
<span class="replaced" title="{{word}}">{{ choice(source_map[word[0]]) }}</span>
{% else %}
<span class="unchanged" title="unchanged">{{word}}</span>
{% end %}
{% end %}
<br>
{% end %}
</p>
</body>
</html>
main.py
import random
import tornado.web
import tornado.ioloop
from pathlib import Path
import tornado.httpserver
from tornado.options import define, options, parse_command_line
define("port", default=8888, help="运行端口", type=int)
class IndexHandler(tornado.web.RequestHandler):
def get(self):
self.render('index.html')
class MungedPageHandler(tornado.web.RequestHandler):
def map_by_first_letter(self, text):
mapped = dict()
for line in text.split('\r\n'):
for word in [x for x in line.split(' ') if len(x) > 0]:
if word[0] not in mapped:
mapped[word[0]] = []
mapped[word[0]].append(word)
return mapped
def post(self):
source_text = self.get_argument('source')
text_to_change = self.get_argument('change')
source_map = self.map_by_first_letter(source_text)
change_lines = text_to_change.split('\r\n')
self.render('munged.html', source_map=source_map, change_lines=change_lines, choice=random.choice)
if __name__ == "__main__":
parse_command_line()
print("http://localhost:{}/".format(options.port))
app = tornado.web.Application(
handlers=[
(r"/", IndexHandler),
(r"/poem", MungedPageHandler)
],
template_path=Path(__file__).parent / "templates", # 模板文件
static_path=Path(__file__).parent / "static", # 静态文件
)
http_server = tornado.httpserver.HTTPServer(app)
http_server.listen(options.port)
tornado.ioloop.IOLoop.instance().start()
效果
CSS、JavaScript、图像等无需单独编写处理函数的静态内容,可指定 static_path
实现。
<link rel="stylesheet" href="{{ static_url("style.css") }}">
生成静态URL,而不使用硬编码的原因:
static_url
会创建了一个基于文件内容的hash值,并将其添加到URL末尾(查询字符串的参数v)。这个hash值确保浏览器总是加载文件的最新版而不是缓存版本。- 改变应用URL结构,而不需要改变模板中的代码。
模板继承
项目结构
recommended.css
.book {
width: 500px;
border: 1px solid #555;
padding: 0 15px 0 15px;
margin: 15px;
background-color: #F5F5F5;
}
.book img.book_image {
margin: 5px 15px 5px 0;
float: left;
}
.book .book_details {
margin: 5px 10px 5px 0;
}
.book .book_title {
color: #777;
}
style.css
body {
font-family: Georgia;
}
.small {
font-size: .7em;
}
#discussion {
width: 700px;
margin: 10px 0 10px 0;
padding: 10px;
color: #111;
}
#discussion h3 {
text-decoration: underline;
}
#discussion .comment {
width: 500px;
margin: 10px 0 10px 0;
padding: 10px;
border: 1px solid #555;
background-color: #FAFAFA;
}
#discussion .comment .comment_user {
text-decoration: underline;
}
#discussion .comment .comment_user .bold {
font-weight: bold;
}
#discussion .comment .comment_text {
font-style: italic;
}
collective_intelligence.gif
head_first_python.gif
restful_web_services.gif
recommended.js
// document.write("script loaded!");
script.js
模板继承 使站点能够复用像header、footer和布局网格这样的内容。
使用 extends
和 block
语句进行模板继承
在新模板上继承模板只需在顶部放上一句 {% extends "filename.html" %}
即可
需要复用的内容用 block
包围,如
{% block header %}
<h1>Hello world!</h1>
{% end %}
main.html 基础模板,是整个网站的通用架构
<html>
<head>
<title>{{ page_title }}</title>
<link rel="stylesheet" href="{{ static_url("css/style.css") }}"/>
</head>
<body>
<div id="container">
<header>
{% block header %}<h1>Burt's Books</h1>{% end %}
</header>
<div id="main">
<div id="content">
{% block body %}{% end %}
</div>
</div>
<footer>
{% block footer %}
<p>更多信息联系: <a href="mailto:contact@burtsbooks.com">contact@burtsbooks.com</a>.</p>
<p class="small">在Facebook上关注我们: {% raw linkify("https://facebook.com/burtsbooks", extra_params='ref=website') %}.</p>
{% end %}
</footer>
</div>
<script src="{{ static_url("js/script.js") }}"></script>
</body>
</html>
index.html 首页,提供基础信息,替换header和body块的信息
{% extends "main.html" %}
{% block header %}
<h1>{{ header_text }}</h1>
{% end %}
{% block body %}
<div id="hello">
<p>Welcome to Burt's Books!</p>
<p>...</p>
</div>
{% end %}
效果
Tornado默认自动转义。
自动转义把标签转换为对应的HTML实体,防止后端为数据库的网站被恶意脚本攻击。
比如,攻击者可以使用<script>标签加载其他JavaScript文件,进行跨站脚本攻击、XSS等。
禁止自动转义
{% autoescape None %}
{{ mailLink }}
或使用 {% raw %}
指定不转义的内容
包含链接
{% raw linkify("https://fb.me/burtsbooks", extra_params='ref=website') %}.
discussion.html
{% extends "main.html" %}
{% block header %}
<h1>{{ header_text }}</h1>
{% end %}
{% block body %}
<div id="discussion">
<h3>Comments</h3>
{% for comment in comments %}
<div class="comment">
<p class="comment_user"><span class="bold">{{ comment["user"] }}</span> says:</p>
<p class="comment_text">{{ comment["text"] }}</p>
</div>
{% end %}
</div>
{% end %}
效果
UI模块是封装模板中包含的标记、样式以及行为的可复用组件,使用 {% module Foo(...) %}
引用。
可以在 Tornado 中定义参数 embedded_javascript、embedded_css、javascript_files、css_files,嵌入JavaScript和CSS文件等。
{{ locale.format_date(book["date"]) }}
调用了 tornado.locale
的日期处理方法:
relative=False
返回绝对时间(包含小时和分钟)full_format=True
返回月、日、年和时间的完整日期shorter=True
隐藏时间,只显示月、日和年。
book.html
<div class="book">
<h3 class="book_title">{{ book["title"] }}</h3>
{% if book["subtitle"] != "" %}
<h4 class="book_subtitle">{{ book["subtitle"] }}</h4>
{% end %}
<img src="{{ book["image"] }}" class="book_image"/>
<div class="book_details">
<div class="book_date_released">出版时间: {{ book["date_released"] }}</div>
<div class="book_date_added">上架时间: {{ locale.format_date(book["date_added"], relative=False) }}</div>
<h5>商品详情:</h5>
<div class="book_body">{% raw book["description"] %}</div>
</div>
</div>
recommended.html
{% extends "main.html" %}
{% autoescape None %}
{% block header %}
<h1>{{ header_text }}</h1>
{% end %}
{% block body %}
{% for book in books %}
{{ modules.Book(book) }}
{% end %}
{% end %}
main.py
import tornado.web
import tornado.ioloop
from pathlib import Path
import tornado.httpserver
from tornado.options import define, options, parse_command_line
define("port", default=8888, help="运行端口", type=int)
class IndexHandler(tornado.web.RequestHandler):
def get(self):
self.render(
"index.html",
page_title="Burt's Books | 首页",
header_text="首页",
)
class DiscussionHandler(tornado.web.RequestHandler):
def get(self):
self.render(
"discussion.html",
page_title="Burt's Books | 聊天室",
header_text="聊天室",
comments=[
{
"user": "张三",
"text": "你好!"
},
{
"user": "李四",
"text": "早上好。"
},
{
"user": "黑客",
"text": "黑了你的网站 <script src=\"http://melvins-web-sploits.com/evil_sploit.js\"></script><script>alert('黑了你的网站...');</script>"
}
]
)
class RecommendedHandler(tornado.web.RequestHandler):
def get(self):
self.render(
"recommended.html",
page_title="Burt's Books | 推荐书籍",
header_text="推荐书籍",
books=[
{
"title": "集体智慧编程",
"subtitle": "构建智能Web 2.0应用程序",
"image": "/static/images/collective_intelligence.gif",
"author": "Toby Segaran",
"date_added": 1310248056,
"date_released": "2007年08月",
"description": "<p>本书演示了如何构建Web应用程序来挖掘Internet上的大量数据。通过算法从Web网站获取、收集并分析用户的数据和反馈信息,以便创造新的用户价值和商业价值。包括协同过滤技术、集群数据分析、搜索引擎技术、海量信息搜索与分析、贝叶斯过滤技术、决策树技术实现预测和决策建模、社交网络信息匹配、机器学习和人工智能应用等。</p>"
},
{
"title": "RESTful Web Services",
"subtitle": "面向现实世界的Web服务",
"image": "/static/images/restful_web_services.gif",
"author": "Leonard Richardson, Sam Ruby",
"date_added": 1311148056,
"date_released": "2007年05月",
"description": "<p>除了构建人类使用的Web站点,未来更在于构建机器使用的Web站点。本书解释了如何构建网络,充分利用REST,HTTP和Web基础设施。详细讨论了统一口、资源、表述、URI、请求、缓存、安全等诸多内容。</p>"
},
{
"title": "深入浅出Python",
"subtitle": "",
"image": "/static/images/head_first_python.gif",
"author": "Paul Barry",
"date_added": 1311348056,
"date_released": "2010年11月",
"description": "<p>你是否曾希望从一本书中学习Python?本书提供一个完整的Python学习体验,它以一种独特的方法学习该语言,帮助您成为一个伟大的Python程序员。您将快速学习该语言的基础知识,然后学习持久性、异常处理、Web开发、SQLite等。</p>"
}
]
)
class BookModule(tornado.web.UIModule):
def render(self, book):
return self.render_string(
"modules/book.html",
book=book,
)
def css_files(self):
return "css/recommended.css"
def javascript_files(self):
return "js/recommended.js"
if __name__ == "__main__":
parse_command_line() # 解析命令行参数
print("http://localhost:{}/".format(options.port)) # 首页
print("http://localhost:{}/discussion/".format(options.port)) # 聊天室
print("http://localhost:{}/recommended/".format(options.port)) # 推荐书籍
app = tornado.web.Application(
handlers=[
(r"/", IndexHandler),
(r"/discussion/", DiscussionHandler),
(r"/recommended/", RecommendedHandler),
],
template_path=Path(__file__).parent / "templates", # 模板文件
static_path=Path(__file__).parent / "static", # 静态文件
ui_modules={"Book": BookModule},
)
http_server = tornado.httpserver.HTTPServer(app)
http_server.listen(options.port)
tornado.ioloop.IOLoop.instance().start()
效果
异步和非阻塞
官方文档建议 Python 3.5+ 用 async def
替代 tornado.gen
装饰器
尽量使用 async
而不是 coroutine
装饰器的原因:
- 基于coroutine是一个从生成器过渡到协程的方案
- yield和await的混合使用造成代码可读性很差
- 生成器可以模拟协程,但生成器应该做自己
- 原生协程总体来说比基于装饰器的协程快
- 原生协程可以使用async for和async with更符合Python风格
- 原生协程返回的是awaitable对象,装饰器协程返回的是future
为什么使用异步:
- CPU速度远高于IO速度
- IO包括网络访问和本地文件访问,比如requests,urllib等传统网络库都是同步IO
- 网络IO大部分时间处于等待状态,在等待的时候CPU是空闲的,但又不能执行其他操作
阻塞:调用函数时当前线程被挂起。
非阻塞:调用函数时当前线程不会被挂起,而是立即返回。
socket阻塞IO
import socket
client = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
host = 'www.baidu.com'
client.connect((host, 80))
client.send('GET {} HTTP/1.1\r\nHost:{}\r\nConnection:close\r\n\r\n'.format('/', host).encode('utf-8'))
data = b''
while True:
d = client.recv(1024) # 阻塞直到有数据
if d:
data += d
else:
break
data = data.decode('utf-8')
print(data)
socket非阻塞IO
import socket
client = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
client.setblocking(False) # 非阻塞
host = 'www.baidu.com'
try:
client.connect((host, 80))
except BlockingIOError:
# 做其他事
pass
while True:
try:
client.send('GET {} HTTP/1.1\r\nHost:{}\r\nConnection:close\r\n\r\n'.format('/', host).encode('utf-8'))
print('连接成功')
break
except OSError:
pass
data = b''
while True:
try:
d = client.recv(1024)
except BlockingIOError:
continue
if d:
data += d
else:
break
data = data.decode('utf-8')
print(data)
select非阻塞IO
import socket
from selectors import DefaultSelector, EVENT_READ, EVENT_WRITE
selector = DefaultSelector()
class Fetcher:
def connected(self, key):
selector.unregister(key.fd)
self.client.send(
'GET {} HTTP/1.1\r\nHost:{}\r\nConnection:close\r\n\r\n'.format('/', self.host).encode('utf-8'))
selector.register(self.client.fileno(), EVENT_READ, self.readble)
def readble(self, key):
d = self.client.recv(1024)
if d:
self.data += d
else:
selector.unregister(key.fd)
data = self.data.decode('utf-8')
print(data)
def get_url(self, url):
self.client = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
self.client.setblocking(False)
self.data = b''
self.host = 'www.baidu.com'
try:
self.client.connect((self.host, 80))
except BlockingIOError:
pass
selector.register(self.client.fileno(), EVENT_WRITE, self.connected)
def loop_forever():
while True:
ready = selector.select()
for key, mask in ready:
call_back = key.data
call_back(key)
if __name__ == '__main__':
fetcher = Fetcher()
url = 'http://www.baidu.com'
fetcher.get_url(url)
loop_forever()
这样的回调+事件循环实现的异步导致:
- 回调过深代码很难维护
- 栈撕裂造成异常无法向上抛出
协程,又称微线程,Coroutine。协程是可以被暂停和切换到其他协程运行的函数。协程运行时必须有事件循环。
旧代码改写为新代码
from tornado.gen import coroutine
@coroutine
def yield_test():
yield 1
yield 2
yield 3
@coroutine
def main():
yield from yield_test()
新代码
async def yield_test():
yield 1
yield 2
yield 3
async def main():
await yield_test()
HTTPClient同步请求,类似requests
from tornado import httpclient
http_client = httpclient.HTTPClient()
try:
response = http_client.fetch('http://www.baidu.com')
print(response.body)
except Exception as e:
print(e)
http_client.close()
AsyncHTTPClient异步请求
from tornado import ioloop
from tornado import httpclient
http_client = httpclient.AsyncHTTPClient()
async def f():
try:
response = await http_client.fetch('http://www.baidu.com')
except Exception as e:
print(e)
else:
print(response.body)
if __name__ == '__main__':
io_loop = ioloop.IOLoop.current()
io_loop.run_sync(f) # 协程运行完后自动停止事件循环
直接使用Python3的事件循环(Tornado的IOLoop底层是asyncio)
import asyncio
from tornado import httpclient
http_client = httpclient.AsyncHTTPClient()
async def f():
try:
response = await http_client.fetch('http://www.baidu.com')
except Exception as e:
print(e)
else:
print(response.body)
if __name__ == '__main__':
# asyncio.ensure_future(f())
# asyncio.get_event_loop().run_forever()
asyncio.get_event_loop().run_until_complete(f())
高并发爬虫
from bs4 import BeautifulSoup
from urllib.parse import urljoin
from tornado import gen, httpclient, ioloop, queues
base_url = 'http://www.tornadoweb.org/en/stable/'
concurrency = 3 # 并发数
async def get_url_links(url):
response = await httpclient.AsyncHTTPClient().fetch(url) # 爬取
html = response.body.decode() # 解码
soup = BeautifulSoup(html) # 解析
links = [urljoin(base_url, a.get('href')) for a in soup.find_all('a', href=True)] # 当前页面所有url
return links
async def main():
seen = set() # 访问过的页面
q = queues.Queue() # 不使用Python自带的Queue因其同步而非异步
async def fetch_url(current_url):
# 生产者
if current_url in seen: # 访问过直接返回
return
print(current_url) # 输出爬取到的页面
seen.add(current_url) # 已访问
next_urls = await get_url_links(current_url) # 当前页面所有url
for new_url in next_urls:
if new_url.startswith(base_url): # 属于该站点
await q.put(new_url) # 放入队列
async def worker():
# 消费者
async for url in q: # 异步取数据
if url is None:
return
try:
await fetch_url(url)
except Exception as e:
print(e)
finally:
q.task_done()
# 1. 放入初试url到队列
await q.put(base_url)
# 2. 启动协程
workers = gen.multi([worker() for _ in range(concurrency)]) # 多并发消费者
await q.join()
for _ in range(concurrency):
await q.put(None)
await workers
if __name__ == '__main__':
io_loop = ioloop.IOLoop.current()
io_loop.run_sync(main) # 协程运行完后自动停止事件循环
在Tornado中不要写同步IO,不然会阻塞
import time
import tornado.web
import tornado.ioloop
class FirstHandler(tornado.web.RequestHandler):
async def get(self):
time.sleep(5) # 模拟阻塞
self.write("1")
class SecondHandler(tornado.web.RequestHandler):
async def get(self):
self.write("2")
if __name__ == "__main__":
print("http://localhost:8888/")
print("http://localhost:8888/2")
app = tornado.web.Application([
(r"/", FirstHandler),
(r"/2", SecondHandler),
])
app.listen(8888)
tornado.ioloop.IOLoop.current().start()
同时请求两个接口,即使第二个接口没有IO操作,但仍然要等待上一个请求的阻塞IO完成。
RequestHandler常用方法
import tornado.web
import tornado.ioloop
class IndexHandler(tornado.web.RequestHandler):
def get(self):
self.redirect("/") # 重定向,默认状态码为301永久重定向,若参数permanent=True则为302临时重定向
class ErrorHandler(tornado.web.RequestHandler):
def get(self):
self.send_error() # 直接引发错误
def write_error(self, status_code, **kwargs):
print(status_code)
self.write('error') # 跳转错误页
class MainHandler(tornado.web.RequestHandler):
def initialize(self, db):
# 用于初始化
self.db = db
print('1. initialize')
def prepare(self):
# 用于真正调用请求之前的初始化方法
# 打印日志, 打开文件
print('2. prepare')
def on_finish(self):
# 请求完成后调用
# 关闭句柄,清理内存
print('3. on_finish')
def get(self):
self.write("Hello World!")
def post(self):
pass
def head(self):
pass
def delete(self):
pass
def patch(self):
pass
def put(self):
pass
def options(self):
pass
if __name__ == "__main__":
print("http://localhost:8888/")
print("http://localhost:8888/index/")
print("http://localhost:8888/error/")
app = tornado.web.Application([
(r"/", MainHandler, dict(db='db')),
(r"/index/?", IndexHandler),
(r"/error/?", ErrorHandler),
])
app.listen(8888)
tornado.ioloop.IOLoop.current().start()
RequestHandler子类
RedirectHandler
:重定向
StaticFileHandler
:静态文件
static/img.jpg
import tornado.web
import tornado.ioloop
class MainHandler(tornado.web.RequestHandler):
def get(self):
self.write("Hello World!")
if __name__ == "__main__":
print("http://localhost:8888/index/")
print("http://localhost:8888/static/img.jpg")
app = tornado.web.Application([
(r"/", MainHandler),
(r"/index/?", tornado.web.RedirectHandler, {"url": "/"}),
(r"/static/(.*)", tornado.web.StaticFileHandler, {"path": "static"}),
# (r"/(.*)", tornado.web.StaticFileHandler, {"path": ""}),
])
app.listen(8888)
tornado.ioloop.IOLoop.current().start()
协程等待 Condition
from tornado import gen
from tornado.ioloop import IOLoop
from tornado.locks import Condition
condition = Condition()
async def waiter():
print("I'll wait right here") # 1.等待
await condition.wait()
print("I'm done waiting") # 4.继续
async def notifier():
print("About to notify") # 2.通知可以继续
condition.notify()
print("Done notifying") # 3.通知完成
async def runner():
await gen.multi([waiter(), notifier()]) # 同时等待waiter()和notifier()
IOLoop.current().run_sync(runner) # 事件循环
# import asyncio
# asyncio.get_event_loop().run_until_complete(runner()) # 事件循环
长轮询
项目结构
chat.css
body {
background: white;
margin: 10px;
}
body,
input {
font-family: sans-serif;
font-size: 10pt;
color: black;
}
table {
border-collapse: collapse;
border: 0;
}
td {
border: 0;
padding: 0;
}
#body {
position: absolute;
bottom: 10px;
left: 10px;
}
#input {
margin-top: 0.5em;
}
#inbox .message {
padding-top: 0.25em;
}
#nav {
float: right;
z-index: 99;
}
chat.js
$(function () {
$("#messageform").on("submit", function () {
newMessage($(this));
return false;
});
$("#messageform").on("keypress", function (e) {
if (e.keyCode == 13) {
newMessage($(this));
return false;
}
return true;
});
$("#message").select();
updater.poll();
});
function newMessage(form) {
var message = form.formToDict();
var disabled = form.find("input[type=submit]");
disabled.disable();
$.postJSON("/a/message/new", message, function (response) {
updater.showMessage(response);
if (message.id) {
form.parent().remove();
} else {
form.find("input[type=text]").val("").select();
disabled.enable();
}
});
}
function getCookie(name) {
var r = document.cookie.match("\\b" + name + "=([^;]*)\\b");
return r ? r[1] : undefined;
}
jQuery.postJSON = function (url, args, callback) {
args._xsrf = getCookie("_xsrf");
$.ajax({
url: url, data: $.param(args), dataType: "text", type: "POST",
success: function (response) {
if (callback) callback(eval("(" + response + ")"));
}, error: function (response) {
console.log("ERROR:", response);
}
});
};
jQuery.fn.formToDict = function () {
var fields = this.serializeArray();
var json = {};
for (var i = 0; i < fields.length; i++) {
json[fields[i].name] = fields[i].value;
}
if (json.next) delete json.next;
return json;
};
jQuery.fn.disable = function () {
this.enable(false);
return this;
};
jQuery.fn.enable = function (opt_enable) {
if (arguments.length && !opt_enable) {
this.attr("disabled", "disabled");
} else {
this.removeAttr("disabled");
}
return this;
};
var updater = {
errorSleepTime: 500,
cursor: null,
poll: function () {
var args = {"_xsrf": getCookie("_xsrf")};
if (updater.cursor) args.cursor = updater.cursor;
$.ajax({
url: "/a/message/updates", type: "POST", dataType: "text",
data: $.param(args), success: updater.onSuccess,
error: updater.onError
});
},
onSuccess: function (response) {
try {
updater.newMessages(eval("(" + response + ")"));
} catch (e) {
updater.onError();
return;
}
updater.errorSleepTime = 500;
window.setTimeout(updater.poll, 0);
},
onError: function (response) {
updater.errorSleepTime *= 2;
console.log("Poll error; sleeping for", updater.errorSleepTime, "ms");
window.setTimeout(updater.poll, updater.errorSleepTime);
},
newMessages: function (response) {
if (!response.messages) return;
var messages = response.messages;
updater.cursor = messages[messages.length - 1].id;
console.log(messages.length, "new messages, cursor:", updater.cursor);
for (var i = 0; i < messages.length; i++) {
updater.showMessage(messages[i]);
}
},
showMessage: function (message) {
var existing = $("#m" + message.id);
if (existing.length > 0) return;
var node = $(message.html);
node.hide();
$("#inbox").append(node);
node.slideDown();
}
};
index.html
<!DOCTYPE html>
<html>
<head>
<meta charset="UTF-8">
<title>聊天室</title>
<link rel="stylesheet" href="{{ static_url("chat.css") }}">
</head>
<body>
<div id="body">
<div id="inbox">
{% for message in messages %}
{% module Template("message.html", message=message) %}
{% end %}
</div>
<div id="input">
<form action="/a/message/new" method="post" id="messageform">
<table>
<tr>
<td><input type="text" name="body" id="message" style="width:500px"></td>
<td style="padding-left:5px">
<input type="submit" value="{{ _("Post") }}">
<input type="hidden" name="next" value="{{ request.path }}">
{% module xsrf_form_html() %}
</td>
</tr>
</table>
</form>
</div>
</div>
<script src="https://cdn.bootcss.com/jquery/3.0.0/jquery.min.js"></script>
<script src="{{ static_url("chat.js") }}"></script>
</body>
</html>
message.html
<div class="message" id="m{{ message["id"] }}">{% module linkify(message["body"]) %}</div>
main.py
import uuid
import asyncio
import tornado.web
import tornado.locks
import tornado.escape
import tornado.ioloop
from pathlib import Path
from tornado.options import define, options, parse_command_line
define("port", default=8888, help="运行端口", type=int)
class MessageBuffer:
def __init__(self):
self.cond = tornado.locks.Condition() # 允许一个或多个协程等待直到被通知的条件
self.cache = [] # 信息缓存存放在列表中
self.cache_size = 200 # 信息缓存最大条数
def get_messages_since(self, cursor):
"""返回给定游标的新信息列表"""
results = []
for msg in reversed(self.cache):
if msg["id"] == cursor:
break
results.append(msg)
results.reverse()
return results
def add_message(self, message):
"""添加进信息缓存列表"""
self.cache.append(message)
if len(self.cache) > self.cache_size:
self.cache = self.cache[1:] # 超过最大条数删掉最旧一条
self.cond.notify_all() # 每当消息缓存更新时,都会通知cond
global_message_buffer = MessageBuffer() # 单例
class MainHandler(tornado.web.RequestHandler):
def get(self):
self.render("index.html", messages=global_message_buffer.cache)
class MessageNewHandler(tornado.web.RequestHandler):
def post(self):
"""发送消息到聊天室"""
message = {
"id": str(uuid.uuid4()),
"body": self.get_argument("body")
}
# 渲染完的bytes转字符串
message["html"] = tornado.escape.to_unicode(self.render_string("message.html", message=message))
if self.get_argument("next", None):
self.redirect(self.get_argument("next"))
else:
self.write(message)
global_message_buffer.add_message(message) # 添加进信息缓存列表
class MessageUpdatesHandler(tornado.web.RequestHandler):
async def post(self):
"""长轮询请求新消息"""
cursor = self.get_argument("cursor", None)
messages = global_message_buffer.get_messages_since(cursor)
while not messages:
# 保存返回的Future对象,在on_connection_close中取消
self.wait_future = global_message_buffer.cond.wait()
try:
await self.wait_future
except asyncio.CancelledError:
return
messages = global_message_buffer.get_messages_since(cursor)
if self.request.connection.stream.closed():
return
self.write(dict(messages=messages))
def on_connection_close(self):
self.wait_future.cancel()
if __name__ == "__main__":
parse_command_line()
print("http://localhost:{}/".format(options.port))
app = tornado.web.Application(
[
(r"/", MainHandler),
(r"/a/message/new", MessageNewHandler),
(r"/a/message/updates", MessageUpdatesHandler),
],
template_path=Path(__file__).parent / "templates", # 模板文件
static_path=Path(__file__).parent / "static", # 静态文件
xsrf_cookies=True
)
app.listen(options.port)
tornado.ioloop.IOLoop.current().start()
效果
WebSocket
项目结构
chat.css
body {
background: white;
margin: 10px;
}
body,
input {
font-family: sans-serif;
font-size: 10pt;
color: black;
}
table {
border-collapse: collapse;
border: 0;
}
td {
border: 0;
padding: 0;
}
#body {
position: absolute;
bottom: 10px;
left: 10px;
}
#input {
margin-top: 0.5em;
}
#inbox .message {
padding-top: 0.25em;
}
#nav {
float: right;
z-index: 99;
}
chat.js
$(document).ready(function() {
if (!window.console) window.console = {};
if (!window.console.log) window.console.log = function() {};
$("#messageform").on("submit", function() {
newMessage($(this));
return false;
});
$("#messageform").on("keypress", function(e) {
if (e.keyCode == 13) {
newMessage($(this));
return false;
}
});
$("#message").select();
updater.start();
});
function newMessage(form) {
var message = form.formToDict();
updater.socket.send(JSON.stringify(message));
form.find("input[type=text]").val("").select();
}
jQuery.fn.formToDict = function() {
var fields = this.serializeArray();
var json = {};
for (var i = 0; i < fields.length; i++) {
json[fields[i].name] = fields[i].value;
}
if (json.next) delete json.next;
return json;
};
var updater = {
socket: null,
start: function() {
var url = "ws://" + location.host + "/chatsocket";
updater.socket = new WebSocket(url);
updater.socket.onmessage = function(event) {
updater.showMessage(JSON.parse(event.data));
}
},
showMessage: function(message) {
var existing = $("#m" + message.id);
if (existing.length > 0) return;
var node = $(message.html);
node.hide();
$("#inbox").append(node);
node.slideDown();
}
};
index.html
<!DOCTYPE html>
<html>
<head>
<meta charset="UTF-8">
<title>聊天室</title>
<link rel="stylesheet" href="{{ static_url("chat.css") }}">
</head>
<body>
<div id="body">
<div id="inbox">
{% for message in messages %}
{% include "message.html" %}
{% end %}
</div>
<div id="input">
<form id="messageform">
<table>
<tr>
<td><input type="text" name="body" id="message" style="width:500px"></td>
<td style="padding-left:5px">
<input type="submit" value="发送">
<input type="hidden" name="next" value="{{ request.path }}">
{% module xsrf_form_html() %}
</td>
</tr>
</table>
</form>
</div>
</div>
<script src="https://cdn.bootcss.com/jquery/3.0.0/jquery.min.js"></script>
<script src="{{ static_url("chat.js") }}"></script>
</body>
</html>
message.html
<div class="message" id="m{{ message["id"] }}">{% module linkify(message["body"]) %}</div>
main.py
import uuid
import tornado.web
import tornado.locks
import tornado.escape
import tornado.ioloop
import tornado.websocket
from pathlib import Path
from tornado.options import define, options, parse_command_line
define("port", default=8888, help="运行端口", type=int)
class MainHandler(tornado.web.RequestHandler):
def get(self):
self.render("index.html", messages=ChatSocketHandler.cache)
class ChatSocketHandler(tornado.websocket.WebSocketHandler):
waiters = set()
cache = [] # 信息缓存存放在列表中
cache_size = 200 # 信息缓存最大条数
def get_compression_options(self):
return {} # 默认非空允许压缩
def open(self):
ChatSocketHandler.waiters.add(self)
def on_close(self):
ChatSocketHandler.waiters.remove(self)
@classmethod
def update_cache(cls, chat):
cls.cache.append(chat)
if len(cls.cache) > cls.cache_size:
cls.cache = cls.cache[1:] # 超过最大条数删掉最旧一条
@classmethod
def send_updates(cls, chat):
print("sending message to {} waiters".format(len(cls.waiters)))
for waiter in cls.waiters:
try:
waiter.write_message(chat)
except:
print("Error sending message")
def on_message(self, message):
"""发送消息到聊天室"""
print(message)
parsed = tornado.escape.json_decode(message)
message = {
"id": str(uuid.uuid4()),
"body": parsed["body"]
}
message["html"] = tornado.escape.to_basestring(self.render_string("message.html", message=message))
ChatSocketHandler.update_cache(message)
ChatSocketHandler.send_updates(message)
if __name__ == "__main__":
parse_command_line()
print("http://localhost:{}/".format(options.port))
app = tornado.web.Application(
[
(r"/", MainHandler),
(r"/chatsocket", ChatSocketHandler)
],
template_path=Path(__file__).parent / "templates", # 模板文件
static_path=Path(__file__).parent / "static", # 静态文件
xsrf_cookies=True,
)
app.listen(options.port)
tornado.ioloop.IOLoop.current().start()
效果
设置Cookie
设置Cookie时,若设置Value为JSON字符串,注意:
- 只能为ASCII字符
- 不能含空格
即使用json.dumps(data, separators=(',', ':'))
只保留5个搜索记录
def set_cookie(requestHandler, word):
"""设置查询记录Cookie"""
KEY = "history"
history = requestHandler.get_cookie(KEY, "[]")
history = json.loads(history) # 加载JSON字符串
if word not in history:
history.append(word)
history = history[-5:] # 最后5个元素
history = json.dumps(history, separators=(',', ':'))
requestHandler.set_cookie(KEY, history, expires_days=365)
return history
总结
- Tornado配置HTTP请求的路径时在后面加上
/?
,在访问时,无论后面是否加上斜杠都能访问,而不会出现404。 - 模板渲染
from tornado.template import Template
from tornado.escape import to_unicode
t = Template("<html>{{ myvalue }}</html>")
html = t.generate(myvalue="XXX") # bytes
html = to_unicode(html) # str
print(html)
遇到的坑
- 模板HTML如
<link rel="stylesheet" href="{{ static_url("style.css") }}">
报错
右键templates目录 → Mark Directory as → Template Folder → Jinjia2
封装