线程
多线程 一个进程可以包含多个线程
3个线程 去执行这个 func
import json
import time
from threading import Thread
def func(name):
for i in range(10):
print(name,i)
def Multithred():
t1 = Thread(target=func, args=("周杰伦",)) # 创建一个线程
t2 = Thread(target=func, args=("王力宏",))
t3 = Thread(target=func, args=("陈小春",))
t1.start()
t2.start()
t3.start()
print('主线程结束')
Multithred()
线程池
线程池里面有 100 个任务,每次拿 10 个任务执行
from concurrent.futures import ThreadPoolExecutor
def Threadpoll():
# 一共100个任务,每次只执行 10 个
with ThreadPoolExecutor(10) as t:
for i in range(100):
t.submit(func, f"周杰伦{i}")
接受返回值
func2 函数返回一个值 name , t.submit().add_done_callback() ,任务完成即执行,callback 函数 fn ,fn 函数打印 func2 的返回值 name。
import time
def func2(name,t):
time.sleep(t)
print("我是",name)
return name
def fn(res):
print(res.result())
def ReturnThreadpoll():
with ThreadPoolExecutor(3) as t:
t.submit(func2,"周杰伦",2).add_done_callback(fn)
t.submit(func2, "王力宏", 1).add_done_callback(fn)
t.submit(func2, "刘姥姥", 3).add_done_callback(fn)
# t.submit().add_done_callback() ,任务完成即执行,callback 函数,出现的问题是该函数的执行顺序是不确定的,返回值的顺序是不确定的。
# 为了解决这个问题,使用 map
def ReturnThreadpoll2():
with ThreadPoolExecutor(3) as t:
result = t.map(func2,["周杰伦","王力宏","刘姥姥"],[2,1,3])
for i in result:
print(i)
# map返回值是生成器,返回的内容和任务分发的顺序是一致的。
多线程练习
import requests
import pandas as pd
# 获取网页
def Gethtml(page):
i = page["current"]
url = 'http://www.xinfadi.com.cn/getPriceData.html'
response = requests.post(url=url,data=page)
content = json.loads(response.text)
datalist = content["list"]
keys = ["prodCat","prodName","prodCat","lowPrice","highPrice","avgPrice","place","specInfo","unitInfo","pubDate"]
res = pd.DataFrame()
result1 = pd.DataFrame()
for dataobj in datalist:
data_obj = dict([(key,(dataobj[key])) for key in keys])
result1 = result1.append(pd.DataFrame(data_obj, index=[0])) # 添加新的dataFrame
res = res.append(result1)
res.to_excel(f'Names{i}.xlsx', index=False)
def Getall():
with ThreadPoolExecutor(5) as t:
for i in range(10):
page = {
'current': i + 1,
'limit': 20
}
t.submit(Gethtml,page)
进程
from multiprocessing import Process
def func(name):
for i in range(10000):
print(name,i)
if __name__ == '__main__':
p1 = Process(target=func,args=("周杰伦",))
p2 = Process(target=func, args=("王谷歌",))
p1.start()
p2.start()
多进程与多线程练习,爬取某网站图片并下载到本地
'''
1、从网站获取图片详情页的 url,再从详情页拿到图片的 url 地址
2、下载图片
队列(Queue):进程之间的通信
'''
from multiprocessing import Process,Queue
import requests
from lxml import etree
from concurrent.futures import ThreadPoolExecutor
def get_img_src(q):
url = "https://www.yeitu.com/meinv/"
response = requests.get(url=url)
response.encoding= "utf-8"
tree = etree.HTML(response.text)
href_list = tree.xpath("//div[@class='list-box-p']/ul/li/a/@href") # 选手详情页 url 列表
for href in href_list:
child_res = requests.get(url=href)
child_res.encoding = "utf-8"
child_tree = etree.HTML(child_res.text)
src = child_tree.xpath("//div[@class='img_box']/a/img/@src") # 选手图片地址 url 列表
q.put(src) # 向队列中添加 url
print(f"{src},被塞进队列")
q.put("没了")
def download(url):
print("开始下载",url)
name = url[0].split("/")[-1][:-4]
head = {
'Host': 'file.jiutuvip.com',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/114.0',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8',
'Accept-Language': 'zh-CN,zh;q=0.8,zh-TW;q=0.7,zh-HK;q=0.5,en-US;q=0.3,en;q=0.2',
'Accept-Encoding': 'gzip, deflate, br',
'Connection': 'keep-alive',
'Upgrade-Insecure-Requests': '1',
'Sec-Fetch-Dest': 'document',
'Sec-Fetch-Mode': 'navigate',
'Sec-Fetch-Site': 'none',
'Sec-Fetch-User': '?1',
'TE': 'trailers'
}
with open('./img/' + name + '.webp', "wb") as f:
response = requests.get(url=url[0], headers=head)
print(response)
a = response.content
f.write(a)
print("下载完毕")
def download_img(q):
with ThreadPoolExecutor(2) as t:
while 1:
src = q.get() # 从队列中获取数据,如果没有数据就会阻塞。
if src == "没了":
break
t.submit(download,src)
if __name__ == '__main__':
q = Queue()
p1 = Process(target=get_img_src, args=(q,))
p2 = Process(target=download_img, args=(q,))
p1.start()
p2.start()
协程
一个线程多个协程
import asyncio
import time
async def func1():
print("我是func1")
await asyncio.sleep(2)
print("func1结束")
async def func2():
print("我是func2")
await asyncio.sleep(1)
print("func2结束")
async def func3():
print("我是func3")
await asyncio.sleep(3)
print("func3结束")
if __name__ == '__main__':
start = time.time()
tasks = [func1(),func2(),func3()]
# 协程对象想要执行必须借助于 event_loop 事件轮询
event_loop = asyncio.get_event_loop()
event_loop.run_until_complete(asyncio.wait(tasks))
print(time.time() - start)
有返回值
async def func1():
print("我是func1")
await asyncio.sleep(2)
print("func1结束")
return "func1"
async def func2():
print("我是func2")
await asyncio.sleep(1)
print("func2结束")
return "func2"
async def func3():
print("我是func3")
await asyncio.sleep(3)
print("func3结束")
return "func3"
async def main():
f1 = func1()
f2 = func2()
f3 = func3()
tasks = [
asyncio.create_task(f2), # 创建任务
asyncio.create_task(f1),
asyncio.create_task(f3),
]
result = await asyncio.gather(*tasks,return_exceptions=True) # return_exceptions=True 如果有错误信息,返回。其他任务照常执行
print(result)
if __name__ == '__main__':
event_loop = asyncio.get_event_loop()
event_loop.run_until_complete(main())
协程的两个模块 aiohttp aiofiles 的使用案例
# aiohttp aiofiles 模块的使用
import aiohttp
import aiofiles
# 1 \ 准备一些链接
'''
'https://file.jiutuvip.com/2023/0713/20230713122610102.jpg',
'https://file.jiutuvip.com/2018/0415/20180415024238403.jpg',
'https://file.jiutuvip.com/2018/0415/20180415024238476.jpg',
'https://file.jiutuvip.com/2018/0415/20180415024238300.jpg',
'https://file.jiutuvip.com/2021/1215/20211215014314402.jpg',
'https://file.jiutuvip.com/2020/0305/20200305022340456.jpg'
'''
async def download(url):
print("开始下载",url)
name = url.split("/")[-1][:-4]
head = {
'Host': 'file.jiutuvip.com',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/114.0',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8',
'Accept-Language': 'zh-CN,zh;q=0.8,zh-TW;q=0.7,zh-HK;q=0.5,en-US;q=0.3,en;q=0.2',
'Accept-Encoding': 'gzip, deflate, br',
'Connection': 'keep-alive',
'Upgrade-Insecure-Requests': '1',
'Sec-Fetch-Dest': 'document',
'Sec-Fetch-Mode': 'navigate',
'Sec-Fetch-Site': 'none',
'Sec-Fetch-User': '?1',
'TE': 'trailers'
}
# 发送网络请求
async with aiohttp.ClientSession() as session:
async with session.get(url=url, headers=head) as resp: # 相当于 requests.get(url=url[0], headers=head)
# await resp.text() => resp.text
content = await resp.content.read() # => resp.content
# 写入文件
async with aiofiles.open('./img/' + name + '.webp', "wb") as f:
await f.write(content)
print("下载完毕")
async def main():
url_lists = [
'https://file.jiutuvip.com/2023/0713/20230713122610102.jpg',
'https://file.jiutuvip.com/2018/0415/20180415024238403.jpg',
'https://file.jiutuvip.com/2018/0415/20180415024238476.jpg',
'https://file.jiutuvip.com/2018/0415/20180415024238300.jpg',
'https://file.jiutuvip.com/2021/1215/20211215014314402.jpg',
'https://file.jiutuvip.com/2020/0305/20200305022340456.jpg'
]
tasks = []
for url in url_lists:
t = asyncio.create_task(download(url))
tasks.append(t)
await asyncio.wait(tasks)
if __name__ == '__main__':
event_loop = asyncio.get_event_loop()
event_loop.run_until_complete(main())