线程池优点
减少在创建和销毁线程上所花的时间以及系统资源的开销
如不使用线程池,系统创建大量线程可能导致系统内存消耗完以及"过度切换"。
应用场景
单个任务处理的时间比较短
需要处理大量这样的任务
案例
'''
http://c.biancheng.net/view/2627.html
'''
import requests
import pymysql
from pprint import pprint
from concurrent.futures import ThreadPoolExecutor
class BaiduZhaoping:
def __init__(self):
self.db = pymysql.connect(user='root', password='12345', host='localhost', database='python', port=3306,
charset='utf8')
self.cursor = self.db.cursor()
self.base_url = 'https://talent.baidu.com/httservice/getPostListNew'
self.headers = {
"Connection": "keep-alive",
"Content-Type": "application/x-www-form-urlencoded;charset=UTF-8",
"Origin": "https://talent.baidu.com",
"Referer": "https://talent.baidu.com/jobs/list",
"User-Agent": "xxx",
}
def start_requests(self, page):
data = {
"recruitType": "GRADUATE",
"pageSize": "10",
"keyWord": "",
"curPage": f"{page}",
"projectType": ""
}
response = requests.post(self.base_url, data=data, headers=self.headers)
# pprint(response.json())
return response.json()
def parse(self, response):
lis = []
for i in response['data']['list']:
education = i['education']
name = i['name']
serviceCondition = i['serviceCondition']
tup = (education, name, serviceCondition)
lis.append((0,) + tup)
if len(lis) == 10:
self.save_data(lis)
def save_data(self, lis):
sql = """
insert into bdzp(id, education, name, serviceCondition)
values(%s, %s, %s, %s)
"""
try:
self.cursor.executemany(sql, lis)
self.db.commit()
print('数据保存成功')
except Exception as e:
print('数据保存失败', e)
self.db.rollback()
def create_table(self):
sql = """
create table if not exists bdzp(
id int unsigned primary key auto_increment,
education varchar(100),
name varchar(100),
serviceCondition text
)
"""
try:
self.cursor.execute(sql)
print('表创建成功')
except Exception as e:
print('表创建失败', e)
def main(self):
self.create_table()
with ThreadPoolExecutor(max_workers=5) as pool:
# 将任务全部提交给任务队列,线程池启动空闲的线程从任务队列中各提取一个任务来执行。
# 当任务执行结束后,线程并不会死亡,而是再次返回到线程池中变成空闲状态,等待执行下一个任务。
for i in range(1, 6):
future_obj = pool.submit(self.start_requests, i)
self.parse(future_obj.result())
self.db.close()
if __name__ == '__main__':
bdzp = BaiduZhaoping()
bdzp.main()