首先导入相关应用的库
import re
import requests
from multiprocessing import Queue
import threading
```python
编写页码处理类逻辑
class Crawl_page(threading.Thread):
"""处理页码发送请求"""
# 请求的url https://search.51job.com/list/000000,000000,0000,00,9,99,python,2,2.html
# 定义__init__方法并传入线程名称,页码队列,数据队列
def __init__(self, thread_name, page_queue, data_queue):
# 重构父类方法
super(Crawl_page, self).__init__()
# 线程名称
self.thread_name = thread_name
# 页码队列
self.page_queue = page_queue
# 数据队列
self.data_queue = data_queue
# 定义请求头
self.header = {
"User-Agent": "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 "
"Safari/537.36 OPR/26.0.1656.60",
"User-Agent": "Opera/8.0 (Windows NT 5.1; U; en)",
"User-Agent": "Mozilla/5.0 (Windows NT 5.1; U; en; rv:1.8.1) Gecko/20061208 Firefox/2.0.0 Opera 9.50",
"User-Agent": "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; en) Opera 9.50",
}
# 定义线程启用处理方法
def run(self):
"""多线程启动方法"""
# 输出那个线程正在工作
print("当前启动的线程为{}".format(self.thread_name))
# 定义了page_flag的全局变量为False,while not page_flag 为True
while