【python应用】150行代码从零开始实现简单的web服务器

最新推荐文章于 2022-03-28 14:17:36 发布

薛定谔的猫96

最新推荐文章于 2022-03-28 14:17:36 发布

阅读量548

点赞数

分类专栏： Python 文章标签： python

本文链接：https://blog.csdn.net/qq_42415326/article/details/96560610

版权

Python 专栏收录该内容

83 篇文章

订阅专栏

本文介绍如何使用Python构建一个简易的Web服务器，涵盖处理HTTP请求、返回动态内容、使用CGI脚本扩展功能等方面。

摘要生成于 C知道，由 DeepSeek-R1 满血版支持，前往体验 >

开发工具

使用httpie代替浏览器发送请求并在终端打印响应信息。

废话少说，现在就来写我们第一个web服务器吧

等待某个人连接我们的服务器并向我们发送一个HTTP请求
解析该请求
了解该请求希望请求的内容
服务器根据请求抓取需要的数据（从服务器本地文件中读取或者程序动态生成）
将数据格式化为请求需要的格式
返回HTTP响应

from http.server import BaseHTTPRequestHandler, HTTPServer

class RequestHandler(BaseHTTPRequestHandler):
    '''处理请求并返回页面'''

    # 页面模板
    Page = '''\
        <html>
        <body>
        <p>Hello, web!</p>
        </body>
        </html>
    '''

    # 重写do_GET方法，处理一个GET请求
    def do_GET(self):
        self.send_response(200)
        self.send_header("Content-Type", "text/html")
        self.send_header("Content-Length", str(len(self.Page)))
        self.end_headers()
        self.wfile.write(self.Page.encode('utf-8'))

if __name__ == '__main__':
    serverAddress = ('', 8080)
    #初始化，绑定服务器地址
    server = HTTPServer(serverAddress, RequestHandler)
    #循环检测；保持链接状态
    server.serve_forever()

模块的 BaseHTTPRequestHandler 类会帮我们处理对请求的解析，并通过确定请求的方法来调用其对应的函数，比如方法是 GET ,该类就会调用名为 do_GET 的方法。RequestHandler 继承了 BaseHTTPRequestHandler 并重写了 do_GET 方法，其效果如代码所示是返回 Page 的内容。 Content-Type 告诉了客户端要以处理html文件的方式处理返回的内容。end_headers 方法会插入一个空白行。

运行我们的第一个 web服务器:

$ python3 server.py

可以在浏览器地址输入 127.0.0.1:8080 进行查看:

方便起见，在web服务器开启的情况下，我们重新开一个终端窗口，使用httpie来查看输出(之后都使用httpie来查看输出):

http 127.0.0.1:8080

httpie很贴心地显示了响应报文的全部内容

修改之前的代码来显示请求的信息，并重构代码：

from http.server import BaseHTTPRequestHandler,HTTPServer

class RequestHandler(BaseHTTPRequestHandler):
    '''
    处理请求并返回页面
    '''
    html = '''\
        <html>
        <body>
        <table>
        <tr>  <td>Header</td>          <td>Value</td>  </tr>
        <tr>  <td>Date and time</td>   <td>{date_time}</td>  </tr>
        <tr>  <td>Client host</td>     <td>{client_host}</td>  </tr>
        <tr>  <td>Client port</td>   <td>{client_port}</td>  </tr>
        <tr>  <td>Command</td>       <td>{command}</td>  </tr>
        <tr>  <td>Path</td>        <td>{path}</td>  </tr>
        </table>
        </body>
        </html>
    '''

    #处理GET请求
    def do_GET(self):
        #页面设计
        page = self.create_page()
        #响应
        self.send_content(page)

    #页面设计
    def create_page(self):
        values = {
            'date_time':self.date_time_string(),
            'client_host':self.client_address[0],
            'client_port':self.client_address[1],
            'command':self.command,
            'path':self.path
        }
        page = self.html.format(**values)
        return page

    #响应内容
    def send_content(self,html):
        self.send_response(200)
        self.send_header('Content-Type','text/html')
        self.send_header('Content-Length',str(len(html)))
        self.end_headers()
        self.wfile.write(html.encode('utf-8'))

if __name__ == '__main__':
    serverAddress = ('',8080)
    server = HTTPServer(serverAddress,RequestHandler)
    server.serve_forever()

运行看看:

 http 127.0.0.1:8080/something.html

注意到它仍旧返回了200 OK而不是404 Not Found，即使 something.html 文件并不存在。那是因为我们现在的web服务器还没有实现找不到文件就返回404错误的功能。

怎么解决返回404的问题呢，首先得有返回文件的功能吧。

首先将之前的代码中的html放入新建的index.html下：

<html>
<body>
<table>
<tr>  <td>Header</td>          <td>Value</td>  </tr>
<tr>  <td>Date and time</td>   <td>{date_time}</td>  </tr>
<tr>  <td>Client host</td>     <td>{client_host}</td>  </tr>
<tr>  <td>Client port</td>   <td>{client_port}</td>  </tr>
<tr>  <td>Command</td>       <td>{command}</td>  </tr>
<tr>  <td>Path</td>        <td>{path}</td>  </tr>
</table>
</body>
<html>

import os,sys
from http.server import BaseHTTPRequestHandler,HTTPServer

class ServerException(Exception):
    '''服务器内部错误'''
    pass

class RequestHandler(BaseHTTPRequestHandler):
    '''
    处理请求并返回页面
    '''
    Error_Page = '''\
        <html>
        <body>
        <h1>Error accessing {path}</h1>
        <p>{msg}</p>
        </body>
        </html>

'''

    #异常响应
    def handle_error(self,msg):
        content = self.Error_Page.format(path=self.path,msg=msg)
        self.send_content(content.encode('utf-8'),404)

    #文件处理
    def handle_file(self,full_path):
        try:
            with open(full_path,'r') as r:
                content = r.read()
            content = self.create_page(content)
            self.send_content(content.encode('utf-8'))

        except IOError as msg:
            msg = "'{0}' cannot be read :{1}".format(self.path,msg)
            self.handle_error(msg)
    


    #处理GET请求
    def do_GET(self):
        try:
            full_path = os.getcwd() + self.path
            #文件不存在
            if not os.path.exists(full_path):
                raise ServerException("'{0}' not found".format(self.path))
            #是一个文件
            elif os.path.isfile(full_path):
                self.handle_file(full_path)
            #不是文件
            else:
                raise ServerException("Unknown object '{0}'".format(self.path))
        except Exception as msg:
            self.handle_error(msg)

    #页面设计
    def create_page(self,content):
        values = {
            'date_time':self.date_time_string(),
            'client_host':self.client_address[0],
            'client_port':self.client_address[1],
            'command':self.command,
            'path':self.path
        }
        content = content.format(**values)
        return content

    #响应内容
    def send_content(self,content,status=200):
        self.send_response(status)
        self.send_header('Content-Type','text/html')
        self.send_header('Content-Length',str(len(content)))
        self.end_headers()
        self.wfile.write(content)

if __name__ == '__main__':
    serverAddress = ('',8080)
    server = HTTPServer(serverAddress,RequestHandler)
    server.serve_forever()

大部分时候我们都希望能够直接在http://127.0.0.1:8080/ 显示主页内容。要怎么做呢，也许我们可以在do_GET那冗长的if-elif-else判断里再加一个判断请求地址是不是根地址的分支，也许我们可以找到一个更加聪明的方法。

比如说把每一种情况都单独写成一个条件类，条件类中的test方法用来判断是否符合该类指定的条件，act则是符合条件时的处理函数。其中的handler是对RequestHandler实例的引用，通过它，我们就能调用handle_file进行响应。

class cass_is_file(object):
    '''
    路径是文件
    '''
    def test(self,handler):
        return os.path.isfile(handler.full_path)
    def act(self,handler):
        handler.handle_file(handler.full_path)

class cass_no_path(object):
    '''
    路径不存在
    '''
    def test(self,handler):
        return not os.path.exists(handler.full_path)
    def act(self,handler):
        raise ServerException("'{0}' not found".format(handler.path))

class cass_other(object):
    '''
    其他情况
    '''
    def test(self,handler):
        return True
    def act(self,handler):
        raise ServerException("Unknown object '{0}'".format(handler.path))

class RequestHandler(BaseHTTPRequestHandler):
    ...

    CASES = [cass_is_file(),cass_other(),cass_no_path()]
    ...
    ...

    #处理GET请求
    def do_GET(self):
        try:
            #文件完整路径
            full_path = os.getcwd() + self.path

            #遍历条件类
            for case in self.CASES:
                if case.test(self):
                    case.act(self)
                    break

        except Exception as msg:
            self.handle_error(msg)

    ...
    ...

这样每当我们需要考虑一个新的情况时，只要新写一个条件处理类然后加到 Cases 中去就行了，是不是比原先在if-elif-else中添加条件的做法看起来更加干净更加清楚呢，毕竟修改原有的代码是一件很有风险的事情，调试起来也非常麻烦。在做功能扩展的同时尽量不要修改原代码是软件开发过程中需要牢记的一点。

我们希望浏览器访问根url的时候能返回工作目录下index.html的内容，那就需要再多加一个条件判断啦。

写一个新的条件处理类:

写一个新的条件处理类:

class case_directory_index_file(object):

    def index_path(self, handler):
        return os.path.join(handler.full_path, 'index.html')

    #判断目标路径是否是目录&&目录下是否有index.html
    def test(self, handler):
        return os.path.isdir(handler.full_path) and \
               os.path.isfile(self.index_path(handler))

    #响应index.html的内容
    def act(self, handler):
        handler.handle_file(self.index_path(handler))


加到Cases中:

Cases = [case_no_file(),
         case_existing_file(),
         case_directory_index_file(),
         case_always_fail()]

迄今为止，完整代码：

import os,sys
from http.server import BaseHTTPRequestHandler,HTTPServer


class cass_is_file(object):
    '''
    路径是文件
    '''
    def test(self,handler):
        return os.path.isfile(handler.full_path)
    def act(self,handler):
        handler.handle_file(handler.full_path)

class cass_no_path(object):
    '''
    路径不存在
    '''
    def test(self,handler):
        return not os.path.exists(handler.full_path)
    def act(self,handler):
        raise ServerException("'{0}' not found".format(handler.path))

class cass_other(object):
    '''
    其他情况
    '''
    def test(self,handler):
        return True
    def act(self,handler):
        raise ServerException("Unknown object '{0}'".format(handler.path))

class case_directory_index_file(object):

    #拼接路径
    def index_path(self,handler):
        return os.path.join(handler.full_path,'index.html')

    #判断目标路径是否是目录and目录下是否有index.html
    def test(self,handler):
        return os.path.isdir(handler.full_path) and os.path.isfile(self.index_path(handler))
    
    def act(self,handler):
        handler.handle_file(self.index_path(handler))

class ServerException(Exception):
    '''服务器内部错误'''
    pass

class RequestHandler(BaseHTTPRequestHandler):
    '''
    处理请求并返回页面
    '''
    CASES = [cass_no_path(),cass_is_file(),case_directory_index_file(),cass_other()]
    Error_Page = '''\
        <html>
        <body>
        <h1>Error accessing {path}</h1>
        <p>{msg}</p>
        </body>
        </html>

    '''

    #异常响应
    def handle_error(self,msg):
        content = self.Error_Page.format(path=self.path,msg=msg)
        self.send_content(content.encode('utf-8'),404)

    #文件处理
    def handle_file(self,full_path):
        try:
            with open(full_path,'r') as r:
                content = r.read()
            content = self.create_page(content)
            self.send_content(content.encode('utf-8'))

        except IOError as msg:
            msg = "'{0}' cannot be read :{1}".format(self.path,msg)
            self.handle_error(msg)
    


    #处理GET请求
    def do_GET(self):
        try:
            #文件完整路径
            self.full_path = os.getcwd() + self.path

            #遍历条件类
            for case in self.CASES:
                if case.test(self):
                    case.act(self)
                    break

        except Exception as msg:
            self.handle_error(msg)

    #页面设计
    def create_page(self,content):
        values = {
            'date_time':self.date_time_string(),
            'client_host':self.client_address[0],
            'client_port':self.client_address[1],
            'command':self.command,
            'path':self.path
        }
        content = content.format(**values)
        return content

    #响应内容
    def send_content(self,content,status=200):
        self.send_response(status)
        self.send_header('Content-Type','text/html')
        self.send_header('Content-Length',str(len(content)))
        self.end_headers()
        self.wfile.write(content)

if __name__ == '__main__':
    serverAddress = ('',8080)
    server = HTTPServer(serverAddress,RequestHandler)
    server.serve_forever()

当然，大部分人都不希望每次给服务器加新功能都要到服务器的源代码里进行修改。

如果程序能独立在另一个脚本文件里运行那就再好不过了。本小节会实现CGI的效果。

CGI？

CGI即通用网关接口(Common Gateway Interface)，是外部应用程序（CGI程序）与Web服务器之间的接口标准，是在CGI程序和Web服务器之间传递信息的规程。CGI规范允许Web服务器执行外部程序，并将它们的输出发送给Web浏览器，CGI将Web的一组简单的静态超媒体文档变成一个完整的新的交互式媒体。通俗的讲CGI就像是一座桥，把网页和WEB服务器中的执行程序连接起来，它把HTML接收的指令传递给服务器的执行程序，再把服务器执行程序的结果返还给HTML页。CGI 的跨平台性能极佳，几乎可以在任何操作系统上实现

CGI脚本工作流程：

浏览器通过HTML表单或超链接请求指向一个CGI应用程序的URL。
服务器收发到请求。
服务器执行所指定的CGI应用程序。
CGI应用程序执行所需要的操作，通常是基于浏览者输入的内容。
CGI应用程序把结果格式化为网络服务器和浏览器能够理解的文档（通常是HTML网页）。
网络服务器把结果返回到浏览器中。

在server.py中新建一个处理脚本文件的条件类：

from datetime import datetime
print('''\
<html>
<body>
<p>Generated {0}</p>
</body>
</html>'''.format(datetime.now()))


#在server.py中新建一个处理脚本文件的条件类：

class case_cgi_file(object):
    '''脚本文件处理'''

    def test(self, handler):
        return os.path.isfile(handler.full_path) and \
               handler.full_path.endswith('.py')

    def act(self, handler):
        ##运行脚本文件
        handler.run_cgi(handler.full_path)


#在server.py中实现运行脚本文件的函数:

import subprocess

def run_cgi(self, full_path):
    data = subprocess.check_output(["python3", full_path],shell=False)
    self.send_content(data)


#不要忘了加到Cases中去:
#注意这里的顺序，需要先判断是否是需要执行的脚本文件，再判断是否为普通文件
Cases = CASES = [cass_no_path(),case_cgi_file(),cass_is_file(),case_directory_index_file(),cass_other()]

回头看看我们的代码，注意到一个新的问题了吗？虽然条件判断已经被我们整理到几个类中去了，但是像run_cgi只有在路径为py文件的条件下才使用的函数是放在 RequestHandler下的，那以后再加几个新功能，但是这类函数都放到 RequestHandler下的话可想而知RequestHandler会变的臃肿不堪。当然你会想这算什么问题嘛，把它放到各自的条件类下不就好了噢。

各自的代码归各自是个好办法，但有时候不同的条件类内可能会有功能相同的函数，这时候我们都知道重复相同的代码是软件开发里很忌讳的一件事情，那么怎么处理重复的代码呢？

可以抽象出一个基类嘛，遇到重复的内容就放在基类的下面，所有的条件类都继承这个基类。

import os,sys
import subprocess
from http.server import BaseHTTPRequestHandler,HTTPServer

class base_case(object):
    '''
    条件处理基类
    '''
    #文件处理
    def handle_file(self,handler,full_path):
        try:
            with open(full_path,'r') as r:
                content = r.read()
            content = handler.create_page(content)
            handler.send_content(content.encode('utf-8'))

        except IOError as msg:
            msg = "'{0}' cannot be read :{1}".format(handler.path,msg)
            handler.handle_error(msg)

    def index_path(self,handler):
        return os.path.join(handler.full_path,'index.html')
    
    #断言，未实现，由子类实现
    def test(self,handler):
        assert False,'Not implemented'

    def act(self,handler):
        assert False,'Not implemented'

class cass_no_path(base_case):
    '''
    路径不存在
    '''
    def test(self,handler):
        return not os.path.exists(handler.full_path)
    def act(self,handler):
        raise ServerException("'{0}' not found".format(handler.path))


class case_cgi_file(object):
    '''
    可执行脚本
    '''
    def run_cgi(self,handler):
        #获取脚本输出
        content = subprocess.check_output(["python",handler.full_path],shell=False)
        handler.send_content(content)

    #判断目标路径是否为py文件
    def test(self,handler):
        return os.path.isfile(handler.full_path) and handler.full_path.endswith('.py')

    def act(self,handler):
        self.run_cgi(handler)


class cass_is_file(base_case):
    '''
    路径是文件
    '''
    def test(self,handler):
        return os.path.isfile(handler.full_path)
    def act(self,handler):
        self.handle_file(handler,handler.full_path)


class case_directory_index_file(object):

    #判断目标路径是否是目录and目录下是否有index.html
    def test(self,handler):
        return os.path.isdir(handler.full_path) and os.path.isfile(self.index_path(handler))
    
    def act(self,handler):
        self.handle_file(handler,self.index_path(handler))


class cass_other(base_case):
    '''
    其他情况
    '''
    def test(self,handler):
        return True
    def act(self,handler):
        raise ServerException("Unknown object '{0}'".format(handler.path))


class ServerException(Exception):
    '''服务器内部错误'''
    pass


class RequestHandler(BaseHTTPRequestHandler):
    '''
    处理请求并返回页面
    '''
    #注意这里的顺序，需要先判断是否是需要执行的脚本文件，再判断是否为普通文件
    CASES = [cass_no_path(),case_cgi_file(),cass_is_file(),case_directory_index_file(),cass_other()]
    
    Error_Page = '''\
        <html>
        <body>
        <h1>Error accessing {path}</h1>
        <p>{msg}</p>
        </body>
        </html>

'''

    #异常响应
    def handle_error(self,msg):
        content = self.Error_Page.format(path=self.path,msg=msg)
        self.send_content(content.encode('utf-8'),404)


    #处理GET请求
    def do_GET(self):
        try:
            #文件完整路径
            self.full_path = os.getcwd() + self.path

            #遍历条件类
            for case in self.CASES:
                if case.test(self):
                    case.act(self)
                    break

        except Exception as msg:
            self.handle_error(msg)

    #index.html的页面设计
    def create_page(self,content):
        values = {
            'date_time':self.date_time_string(),
            'client_host':self.client_address[0],
            'client_port':self.client_address[1],
            'command':self.command,
            'path':self.path
        }
        content = content.format(**values)
        return content

    #发送响应内容给客户端
    def send_content(self,content,status=200):
        self.send_response(status)
        self.send_header('Content-Type','text/html')
        self.send_header('Content-Length',str(len(content)))
        self.end_headers()
        self.wfile.write(content)

if __name__ == '__main__':
    serverAddress = ('',8080)
    server = HTTPServer(serverAddress,RequestHandler)
    server.serve_forever()

通过重构我们发现，真正实施行为(Action)的代码逻辑可以抽出来进行封装(封装成各种条件处理类)，而 BaseHTTPRequestHandler类或是 basecase类提供了供条件处理类使用的接口，它们可以看作是一系列服务(Service)，在软件设计中我们常常会把业务代码进行分层，将行为与服务分开，降低耦合，更有利于我们开发维护代码。

通过统一接口，以及cgi程序，我们的代码功能扩展变的更加容易，可以专心于编写功能代码，而不用去关心其他部分。case 的添加虽然仍在server代码中，但我们也可以把它放到配置文件中，由server读取配置文件。