import requests
from lxml import etree
import threading
# 单/多 线程爬取 当当书名 不保存
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36'}
urls = ['http://search.dangdang.com/?key=python+%C5%C0%B3%E6&act=input&page_index={}'.format(page) for page in range(1,11)]
i = 1
def get_mess(url):
'''数据获取 书名'''
resp = requests.get(url,headers=headers)
html = etree.HTML(resp.text)
names = html.xpath('//p[@class="name"]/a/@title')
global i
for name in names:
print(i,name)
i += 1
# 单线程
# def main():
# for url in urls:
# get_mess(url)
# 多线程
def main():
a_list = [] # 空线程列表
for url in urls:
t = threading.Thread(target=get_mess,args=(url,))
a_list.append(t)
# 启动线程
for a in a_list:
a.start()
# 等待所有线程结束
for a in a_list:
a.join()
if __name__ == '__main__':
main()
线程 实践 1
最新推荐文章于 2024-09-02 00:03:51 发布