tornado ioloop

初步解析 socket 流

import socket

EOL = b'\n\n'

response  = b'HTTP/1.0 200 OK\r\nDate: Mon, 1 Jan 1996 01:01:01 GMT\r\n'
response += b'Content-Type: text/plain\r\nContent-Length: 13\r\n\r\n'
response += b'Hello, world!'

serversocket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
serversocket.bind(('0.0.0.0', 8080))
serversocket.listen(1)

try:
    while True:
        connectiontoclient, address = serversocket.accept()
        request = b''
        while EOL not in request:
            request += connectiontoclient.recv(1024)  # 此处会造成阻塞,因为不停的从缓冲区里面读取数据, 如果网络数据还没有到达就会阻塞住等待数据到来
        print ('-'*40 + '\n' + request.decode()[:-2])
        connectiontoclient.send(response)
        connectiontoclient.close()
finally:
    serversocket.close()

上述是一个阻塞的server,请求是顺序处理的, 其中response字符串是HTTP 响应报文。

request += connectiontoclient.recv(1024)

recv(1024) 会造成阻塞。因为不停的从缓冲区读取数据, 如果网络数据还没有到达就会阻塞住等待数据到来。

当程序使用阻塞 socket 的时候, 它通常使用一个线程(甚至是专用进程)在每个socket上执行通信。主程序线程监听服务器 socket, 这个socket 接受来自客户端的传入连接。服务端一次创建一个新的 socket 接受一个连接,将新创建的 socket 传递给一个单独的线程,然后该线程将于客户端进行交互, 因为一个连接都有一个新的线程进行通信,所以任何阻塞也不会影响其他线程执行其各自的任务。

这就是最传统的IO模型 PPC(process per connection),TPC(thread per connection), 效率极低。

C10K 问题主要讨论了处理并发 socket 的一些替代方法,比如使用异步套接字。 在事件发生之前,这些socket不会堵塞,相反,当 socket 收到 信号时 程序立即在异步 socket 上执行操作。应用程序将信号进行初步处理。由于异步 socket 是非阻塞的,因此不需要多个执行线程,所有的工作都可以在一个线程中完成。

linux 有一系列的机制去管理异步 socket, select, poll, epoll 这三种API已经被python实现。

tornado 内部是使用了 epoll 时间通知机制来进行更新的。下面上epoll版的 HTTPServer

这里 使用了 epoll 的水平触发模式

import socketselect 
# select 模块包括 epoll 方法

EOL = b'\n\n'
response  = b'HTTP/1.0 200 OK\r\nDate: Mon, 1 Jan 1996 01:01:01 GMT\r\n'
response += b'Content-Type: text/plain\r\nContent-Length: 13\r\n\r\n'
response += b'Hello, world!'

serversocket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
serversocket.bind(('0.0.0.0', 8080))
serversocket.listen(1)
serversocket.setblocking(0) # 这里使用非阻塞的socket是很有必要的

epoll = select.epoll() # 创建epoll对象
epoll.register(serversocket.fileno(), select.EPOLLIN) # 在服务器的 socket 上注册读事件, 也就是accept事件

try:
    connections = {}; requests = {}; responses = {}
    while True:
        events = epoll.poll(1)    # 查询epoll对象以查明是否发生了感兴趣的事件,参数 1 表示我们愿意等待一秒钟才能发生此类事件。 如果在此查询之前发生了感兴趣的事件,这个查询将立刻返回这些事件的列表
        for fileno, event in events:
            if fileno == serversocket.fileno(): # 当没有通知的时候接受新的请求, 为新的请求创建新的connection
                connection, address = serversocket.accept()  # 这里会创建新的socket
                connection.setblocking(0)  # 为新的socket设置为非阻塞
                epoll.register(connection.fileno(), select.EPOLLIN)
                connections[connection.fileno()] = connection
                requests[connection.fileno()] = b''
                responses[connection.fileno()] = response
            elif event & select.EPOLLIN: # 缓冲区已满, 可以读数据, 发来 select.EPOLLIN 信号, 开始读取数据
                requests[fileno] += connections[fileno].recv(1024)
                if EOL in requests[fileno]:
                    epoll.modify(fileno, select.EPOLLOUT)
                    print('-'*40 + '\n' + requests[fileno].decode()[:-2])
            elif event & select.EPOLLOUT: # 缓冲区已空, 请求已经完了,需要发送响应。发来 select.EPOLLOUT 状态
                bytewritten = connections[fileno].send(responses[fileno])
                responses[fileno] = responses[fileno][byteswritter:]
                if len(responses[fileno]) == 0:
                    epoll.modify(fileno, 0)
                    connections[fileno].shutdown(socket.SHUT_RDWR)
            elif event & select.EPOLLHUP:
                epoll.unregister(fileno)
                connections[fileno].close()
                del connections[fileno]
finally:
    epoll.unregister(serversocket.fileno())
    epoll.close()
    serversocket.close()

这里每一个connection 在服务端都对应着一个 文件描述符 fd, 也就是一个新的socket,这里recv(1024)永远能读到, 不会发生等待数据的情况

下面写一个边缘触发的epoll

import socket, select
EOL1 = b'\n\n'
EOL2 = b'\n\r\n'
response  = b'HTTP/1.0 200 OK\r\nDate: Mon, 1 Jan 1996 01:01:01 GMT\r\n'
response += b'Content-Type: text/plain\r\nContent-Length: 13\r\n\r\n'
response += b'Hello, world!'

serversocket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
serversocket.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
serversocket.bind(('0.0.0.0', 8080))
serversocket.listen(1)
serversocket.setblocking(0)
epoll = select.epoll()
epoll.register(serversocket.fileno(), select.EPOLLIN | select.EPOLLET)

try: 
    connections = {}; requests = {}; responses = {}
    while True:
        events = epoll.poll(1)
        for fileno, event in events:
            if fileno == serversocket.fileno():
                try:
                    while True:
                        connection, address = serversocket.accept()
                        connection.setblocking(0)
                        epoll.register(connection.fileno(), select.EPOLLIN | select.EPOLLET)
                        connections[connection.fileno()] = connection
                        requests[connection.fileno()] = b''
                        responses[connection.fileno()] = response
                except socket.error:
                    pass
            elif event & select.EPOLLIN:
                try:
                    while True:
                        requests[fileno] += connections[fileno].recv(1024)
                except socket.error:
                    pass
                if EOL1 in requests[fileno] or EOL2 in requests[fileno]:
                    epoll.modify(fileno, select.EPOLLOUT | select.EPOLLET)
                    print ('-'*40 + '\n' + requests[fileno].decode()[: -2])
            elif event & select.EPOLLOUT:
                try:
                    while len(responses[fileno]) > 0:
                        byteswritten = connections[fileno].send(responses[fileno])                    responses[fileno] = responses[fileno][byteswritten:]
                except socket.error:
                    pass
                if len(responses[fileno]) == 0:
                    epol.modify(fileno, select.EPOLLET)
                    connections[fileno].shutdown(socket.SHUT_RDWR)
            elif event & select.EPOLLHUP:
                epoll.unregister(fileno)
                connections[fileno].close()
                del connections[fileno]
finally:
    epoll.unregister(seversocket.fileno())
    epoll.close()
    seversocket.close()

由于他们相似, 在移植用 select 或者 poll的应用程序时,大多使用水平触发模式, 当程序员不希望操作系统进行过多介入的时候使用边缘触发模式

这里有一个点,一个port可能有多个请求过来, 每个请求都是TCP请求(有状态的),也就是意味着socket无法复用(UDP可以多个请求公用一个socket, 因为UDP无状态,无需时刻监听socket)那么每个请求过来都务必在当前端口上创建一个新的socket来保证这次连接(无法使用其他端口, 因为存在防火墙无法确定那个端口是可用的)这也就意味着存在多个socket共同监听同一个端口

official example

import errno
import functools
import socket
import tornado.ioloop
from tornado import gen
for tornado.iostream import IOStream

@gen.coroutine
def handle_connection(connection, address):
    stream = IOStream(connection)
    message = yield stream.read_until_close()
    print ('message from client: ', message.decode().strip())

def connection_ready(socket, fd, events):
    while True:
        try:
            connection, address = sock.accept()
        except socket.error as e:
            if e.args[0] not in (errno.EWOULDBLOCK, errno.EAGAIN):
                raise
            return
        connection.setblocking(0)
        handle_connection(connection, address)

if __name__ == '__main__':
    sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM, 0)
    sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
    sock.setblocking(0)
    sock.bind(("", 8888))
    sock.listen(128)

    io_loop = tornado.ioloop.IOLoop.current()
    callback = functools.partial(connection_ready, sock)
    io_loop.add_handler(sock.fileno(), callback, io_loop.READ)
    io_loop.start()

默认情况下, 新构造的 IOLoop 自动成为当前线程的 IOLoop, 除非当前进程已经存在 IOLoop。这个行为可以被传给IOLoop构造器的 make_current 参数控制。如果 make_current=True ,那么新的IOLoop将尝试成为最新的IOLoop, 如果当前进程已经存在IOLoop,那么这个地方会抛出一个异常。

一般来说,一个IOLoop无法以任何方式在fork函数中存活或者被共享,当使用多进程时,每个进程应该创建自己的IOLoop, 这也意味着任何依赖于 IOLoop的对象都需要在子进程里面被创建

再来一版HTTP server

import sys
import socket
import logging
import StringIO
from datetime import datetime

from ioloop import IOLoop

EOL1 = b'\n\n'
EOL2 = b'\n\r\n'

class WSGIServer(object):
    ADDRESS_FAMILY = socket.AF_INET
    SOCKET_TYPE = socket.SOCK_STREAM
    BACKLOG = 5

    HEADER_DATE_FORMAT = "%a, %d %b %Y %H:%M:%S GMT"
    SERVER_NAME = "zigmo/WSGIServer 0.3"

    def __init__(self, server_address):
        self.ssocket = self.setup_server_socket(server_address)
        host, self.server_port = self.ssocket.getsockname()[:2]
        self.server_name = socket.getfqdn(host)

        self.ioloop = IOLoop.instance()
        self.conn_poll = {}

    @classmethod
    def setup_server_socket(cls, server_address):
        ssocket = socket.socket(cls.ADDRESS_FAMILY, cls.SOCKET_TYPE)
        ssocket.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEPORT, 1)
        ssocket.bind(server_address)
        ssocket.listen(cls.BACKLOG)
        ssocket.setblocking(0)
        return ssocket

    def set_app(self, application):
        self.application = application

    def _accept(self, ssocket, event):
        if event & IOLoop.ERROR:
            self._close(ssocket)

        connect, addr = ssocket.accept()
        connect.setblocking(0)
        ioloop = IOLoop.instance()
        ioloop.add_handler(connect, self._receive, IOLoop.READ)

        fd = connect.fileno()
        connection = Connection(fd)
        connection.address = addr
        self.conn_pool[fd] = connection

    def _receive(self, connect, event):
        if event & IOLoop.ERROR:
            self._close(connect)
        fd = connect.fileno()
        connection = self.conn_pool[fd]
        fragment = connect.recv(1024)
        connection.request_buffer.append(fragment)
        last_fragment = ''.join(connection.request_buffer[:2])
        if EOL2 in last_fragment:
            ioloop = IOLoop.instance()
            ioloop.update_handler(fd, IOLoop.WRITE)
            ioloop.replace_handler(fd, self._send)

    def _send(self, connect, event):
        if event & IOLoop.ERROR:
            self._close(connect)

        fd = connect.fileno()
        connection = self.conn_pool[fd]
        if not connection.handled:
            self.handle(connection)
        byteswritten = connect.send(connection.response)
        if byteswritten:
            connection.response = connection.response[byteswritter:]
        if not len(connection.response):
            self._close(connect)

    def _close(self, connect, event=None):
        fd = connect.fileno()
        connect.shutdown(socket.SHUT_RDWR)
        connect.close()

        ioloop = IOLoop.instance()
        ioloop.remove_handler(fd)
        del self.conn_pool[fd]

    def handle(self, connection):
        def start_response(status, response_headers, exc_info=False):
            utc_now = datetime.utcnow().strftime(self.HEADER_DATE_FORMAT)
            connection.headers = response_headers + [
                    ('Date', utc_now),
                    ('Server', self.SERVER_NAME),
            ]
            connection.status = status

        request_text = ''.join(connection.request_buffer)
        environ = self.get_environ(request_text)
        body = self.application(environ, start_response)
        connection.response = self.package_response(body, connection)

    @classmethod
    def parse_request_buffer(cls, text):
        content_lines = text.splitlines()
        request_line = content_lines[0].rstrip('\r\n')
        request_method, path, request_version = request_line.split()
        if '?' in path:
            path, query_string = path.split('?', 1)
        else:
            path, query_string = path, ''

        return {
            'PATH_INFO': path,
            'REQUEST_METHOD': request_method,
            'SERVER_PROTOCOL': request_version,
            'QUERY_STRING': query_string,
        }

    def get_environ(self, request_text):
        request_data = self.parse_request_buffer(request_text)
        scheme = request_data['SERVER_PROTOCOL'].split('/')[1].lower()
        environ = {
            'wsgi.version': (1, 0),
            'wsgi.url_scheme': scheme,
            'wsgi.input': StringIO.StringIO(request_text)
            'wsgi.errors': sys.stderr,
            'wsgi.multithread': False,
            'wsgi.multiprocess': False,
            'wsgi.run_once': False,
            'SERVER_NAME': self.server_name,
            'SERVER_PORT': self.server_port
        }
        environ.update(request_data)
        return environ

    def package_response(self, body, connection):
        response = 'HTTP/1.1 {status}\r\n'.format(status=connection.status)
        for header in connection.headers:
            response += '{0}: {1}\r\n'.format(*header)
        response += '\r\n'
        for data in body:
            response += data
        access_logger.debug('\n' + ''.join('> {line}\n'.format(line=line) for line in response.splitlines()))
        return response
参考
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值