生产者与消费者模式

生产者与消费者模式:

1导入模块:

from lxml import etree
import requests
from threading import Thread
from queue import Queue

2.生产者:

def get_page(url,page_list):
headers = {
‘User-agent’: ‘ios/xxxx.3.3.3.10’
}
while True:
if page_list.empty():
break
page=page_list.get()
full_url = url + str(page - 1) * 10
response=requests.get(full_url,headers=headers)
url_list.put(response.text)

消费者:-------解析:

def parse_page():
while not exit_flag:
try:
response=url_list.get()
tree = etree.HTML(response)
position_info_list = tree.xpath(’//tr[@class=“even”] | //tr[@class=“odd”]’)

        for position_info in position_info_list:
            # 实例化 收集信息
            name = position_info.xpath('.//td[1]/a/text()')[0]
            detailLink = 'https://hr.tencent.com/' + position_info.xpath('.//td[1]/a/@href')[0]
            detailLink = detailLink
            position = position_info.xpath('.//td[2]/text()')
            positionInfo = position[0] if position else ''

            ipeopleNumber = position_info.xpath('.//td[3]/text()')[0]
            workLocation = position_info.xpath('.//td[4]/text()')[0]
            publishTime = position_info.xpath('.//td[5]/text()')[0]
            #     写进文件:
            job = name + ',' + ipeopleNumber + '\n'
            with open('position.txt', 'a', encoding='utf-8')as f:
                f.write(job)
    except:
        pass

获取到的URL页面:

url_list=Queue()
exit_flag=False

调用函数:

if name == ‘main’:
url=‘https://hr.tencent.com/position.php?start=
page_list=Queue()
for page in range(1,280):
page_list.put(page)
for i in range(3):
t=Thread(target=get_page,args=(url,page_list))
t.start()

©️2020 CSDN 皮肤主题: 终极编程指南 设计师:CSDN官方博客 返回首页