import requests
from lxml import etree
from threading import Thread
ts1 =[]
ts2 =[]defget_one_page(page:int):print(f'获取第{page}页数据')if page ==1:
url ='https://pic.netbian.com/index.html'else:
url =f'https://pic.netbian.com/index_{page}.html'
response = requests.get(url)
response.encoding ='gbk'
html = etree.HTML(response.text)
result = html.xpath('//div[@class="slist"]/ul/li/a/img/@src|//div[@class="slist"]/ul/li/a/span/img/@src')# return ['https://pic.netbian.com'+x for x in result]# 一页图片用一个线程来下载
t = Thread(target=download_page_image, args=(['https://pic.netbian.com'+x for x in result],))
t.start()
ts1.append(t)# 一张图片一个线程# for url in ['https://pic.netbian.com'+x for x in result]:# t = Thread(target=download_image, args=(url,))# t.start()defget_all_data():for page inrange(1,11):
t = Thread(target=get_one_page, args=(page,))
t.start()
ts2.append(t)for t in ts2:
t.join()for t in ts1:
t.join()print('图片全部下载完成')defdownload_page_image(urls:list):print(f'下载一页数据:{len(urls)}')for url in urls:
download_image(url)defdownload_image(url:str):
response = requests.get(url)
f =open(f'files/{url.split("/")[-1]}','wb')
f.write(response.content)# print('下载完成!')
get_all_data()
线程类的子类对象
from threading import Thread, current_thread
import time
from datetime import datetime
classDownloadThread(Thread):def__init__(self, name):super().__init__()
self.name = name
# 在子线程中添加任务defrun(self)->None:print(f'{self.name}开始下载:{datetime.now()}')print('当前线程:', current_thread())
time.sleep(2)print(f'{self.name}下载结束:{datetime.now()}')
t1 = DownloadThread('奥特曼')
t2 = DownloadThread('葫芦娃')
t3 = DownloadThread('金刚')# 通过start调用run,run方法会在相应的子线程中执行
t1.start()
t2.start()
t3.start()# 直接调用run,run方法会在主线程中执行
t1.run()
t2.run()
t3.run()
join操作
import time
from datetime import datetime
from threading import Thread, current_thread
from random import randint
# join的用法:# 线程对象.join():等待当前线程的任务结束后才执行后面的代码defdownload(name):print(f'{name}开始下载:{datetime.now()}')# print('当前线程:', current_thread())
time.sleep(randint(3,7))print(f'{name}下载结束:{datetime.now()}')
t1 = Thread(target=download, args=('大话西游',))
t2 = Thread(target=download, args=('忍者神龟',))
t3 = Thread(target=download, args=('黑寡妇',))
t1.start()
t2.start()
t3.start()
t1.join()
t2.join()
t3.join()print('---电影下载结束---')"""
# 第一个线程结束后才开始第二个和第三个线程
t1.start()
t1.join()
t2.start()
t3.start()
"""
多进程
from multiprocessing import Process, current_process
from datetime import datetime
import time
from random import randint
defdownload(name):print(f'{name}开始下载:{datetime.now()}')print('当前线程:', current_process())
time.sleep(randint(3,7))print(f'{name}下载结束:{datetime.now()}')if __name__ =='__main__':# 1.创建进程对象
p1 = Process(target=download, args=('肖申克的救赎',))
p2 = Process(target=download, args=('触不可及',))
p3 = Process(target=download, args=('肖申克的救赎',))# 2.启动进程
p1.start()
p2.start()
p3.start()
p1.join()
p2.join()
p3.join()print('电影下载完成!')