import requests
from lxml import etree
from threading import Thread
defget_one_page(page:int):print(f'获取第{page}页数据')if page ==1:
url ='https://pic.netbian.com/index.html'else:
url =f'https://pic.netbian.com/index_{page}.html'
response = requests.get(url)
response.encoding ='gbk'
html = etree.HTML(response.text)
result = html.xpath('//div[@class="slist"]/ul/li/a/img/@src|//div[@class="slist"]/ul/li/a/span/img/@src')# return ['https://pic.netbian.com'+x for x in result]# 一页图片用一个线程来下载
t = Thread(target=download_page_image, args=(['https://pic.netbian.com'+x for x in result],))
t.start()# 一张图片一个线程# for url in ['https://pic.netbian.com'+x for x in result]:# t = Thread(target=download_image, args=(url,))# t.start()defget_all_data():for page inrange(1,11):
t = Thread(target=get_one_page, args=(page,))
t.start()defdownload_page_image(urls:list):print(f'下载一页数据:{len(urls)}')for url in urls:
download_image(url)defdownload_image(url:str):
response = requests.get(url)
f =open(f'files/{url.split("/")[-1]}','wb')
f.write(response.content)# print('下载完成!')
get_all_data()
3. 线程类的子类对象
from threading import Thread, current_thread
import time
from datetime import datetime
classDownloadThread(Thread):def__init__(self, name):super().__init__()
self.name = name
# 在子线程中添加任务defrun(self)->None:print(f'{self.name}开始下载:{datetime.now()}')print('当前线程:', current_thread())
time.sleep(2)print(f'{self.name}下载结束:{datetime.now()}')
t1 = DownloadThread('奥特曼')
t2 = DownloadThread('葫芦娃')
t3 = DownloadThread('金刚')# 通过start调用run,run方法会在相应的子线程中执行
t1.start()
t2.start()
t3.start()# 直接调用run,run方法会在主线程中执行# t1.run()# t2.run()# t3.run()
4. join操作
from random import randint
from threading import Thread, current_thread
import time
from datetime import datetime
classDownloadThread(Thread):def__init__(self, name):super().__init__()
self.name = name
# 在子线程中添加任务defrun(self)->None:print(f'{self.name}开始下载:{datetime.now()}')print('当前线程:', current_thread())
time.sleep(randint(3,7))print(f'{self.name}下载结束:{datetime.now()}')
t1 = DownloadThread('奥特曼')
t2 = DownloadThread('葫芦娃')
t3 = DownloadThread('金刚')# 通过start调用run,run方法会在相应的子线程中执行
t1.start()
t2.start()
t3.start()"""
join 的用法:
线程对象.join() - 等待当前线程的任务结束后才执行后面的代码
"""
t1.join()
t2.join()
t3.join()print("电影全部下载结束")
5.多进程
from multiprocessing import Process, current_process
from datetime import datetime
import time
from random import randint
defdownload(name):print(f'{name}开始下载:{datetime.now()}')print('当前线程:', current_process())
time.sleep(randint(3,7))print(f'{name}下载结束:{datetime.now()}')if __name__ =='__main__':# 1.创建进程对象
p1 = Process(target=download, args=('肖申克的救赎',))
p2 = Process(target=download, args=('触不可及',))
p3 = Process(target=download, args=('肖申克的救赎',))# 2.启动进程
p1.start()
p2.start()
p3.start()
p1.join()
p2.join()
p3.join()print('电影下载完成!')