//**************** 协程
import asyncio
"""
该函数执行时得到的是一个协程对象.
"""
async def func():
print("我是函数")
if __name__ == '__main__': # 程序的入口
# 协程对象想要执行. 必须借助于 event_loop
# f = func()
# print(f)
f = func() # 协程对象想要执行. 必须借助于 event_loop
# # 拿到事件循环
# 方法1:
event_loop = asyncio.get_event_loop()
# # eventloop执行协程对象. 直到该对象内的内容执行完毕为止.
event_loop.run_until_complete(f)
# 方法2:
asyncio.run(f)
# 如果你的电脑报错: Event Loop has closed!!! 那就不要用 asyncio.run(f)
//*************** 用途 实际 核心
import asyncio
import time
async def func1():
print("我是func1")
await asyncio.sleep(1)
print("func1结束")
async def func2():
print("我是func2")
await asyncio.sleep(2)
print("func2结束")
async def func3():
print("我是func3")
await asyncio.sleep(3)
print("func3结束")
if __name__ == '__main__':
start = time.time()
f1 = func1()
f2 = func2()
f3 = func3()
# 把三个任务放一起
tasks = [
f1,
f2,
f3,
]
asyncio.run(asyncio.wait(tasks))
print(time.time() - start)
//************** 封装任务成text
async def download(url, t):
print("我要下载了")
await asyncio.sleep(t)
print("我下载完了")
async def main():
# 假设已经有了一堆下载链接
urls = [
"http://www.baidu.com",
"http://luoyonghao.com",
"http://qiaofuhaoshuai.com"
]
# 需要封装任务列表
tasks = []
for url in urls:
# 创建任务
task = asyncio.create_task(download(url, 3)) # 加上这个就没有下面的警告了
# 把任务扔到列表, 为了统一处理
tasks.append(task)
# tasks.append(download(url,3)) 可以这么做。如果出现警告,那就上面的方法
# 统一等到协程任务执行完毕
await asyncio.wait(tasks) # py3.8以上的同学. 回去这里会出现警告.
if __name__ == '__main__':
asyncio.run(main())
# event_loop = asyncio.get_event_loop()
# event_loop.run_until_complete(main())
"""
未来爬虫:
1. 扫url, 拿到一堆url
2. 循环url. 创建任务.
3. 统一await
"""
//******* 协程 返回值
import asyncio
async def func1():
print("我是func1")
await asyncio.sleep(1)
print("func1结束")
return "func1的返回值"
async def func2():
print("我是func2")
await asyncio.sleep(2)
print("func2结束")
return "func2的返回值"
async def func3():
print("我是func3")
# print(1/0) # 异常
await asyncio.sleep(3)
print("func3结束")
return "func3的返回值"
async def main():
f1 = func1()
f2 = func2()
f3 = func3()
tasks = [
asyncio.create_task(f3),
asyncio.create_task(f2), # 2 1 3 3 2 1
asyncio.create_task(f1),
]
# # 结束, 运行, set集合: 无序
# # asyncio.wait() 返回的结果.没有顺序
# done, pending = await asyncio.wait(tasks)
# for t in done:
# print(t.result())
# gather 和 wait的区别: gather返回值是有顺序(按照你添加任务的顺序返回的)的.
# return_exceptions=True, 如果有错误信息. 返回错误信息, 其他任务------正常执行.
# return_exceptions=False, 如果有错误信息. 所有任务------直接停止
result = await asyncio.gather(*tasks, return_exceptions=True) # return_exceptions=True
print(result)
if __name__ == '__main__':
asyncio.run(main())
//***** 异步协程 核心 下载图片 案例
# aiohttp, aiofiles
import asyncio
import aiohttp
import aiofiles
"""
"http://pic3.hn01.cn/wwl/upload/2021/05-30/lr53sysfkl5.jpg",
"http://pic3.hn01.cn/wwl/upload/2021/05-30/hgeuzfs4jt2.jpg",
"http://pic3.hn01.cn/wwl/upload/2021/05-30/kwpyey5xv2l.jpg",
"http://pic3.hn01.cn/wwl/upload/2021/05-30/w2xjeyllq1k.jpg",
"""
async def download(url):
print("开始下载", url)
file_name = url.split("/")[-1]
# 相当于requests
async with aiohttp.ClientSession() as session:
# 发送网络请求
async with session.get(url) as resp:
# await resp.text() # => resp.text
content = await resp.content.read() # => resp.content
# 写入文件
async with aiofiles.open(file_name, mode="wb") as f:
await f.write(content)
print("下载完成.", url)
async def main():
url_list = [
"http://pic3.hn01.cn/wwl/upload/2021/05-30/lr53sysfkl5.jpg",
"http://pic3.hn01.cn/wwl/upload/2021/05-30/hgeuzfs4jt2.jpg",
"http://pic3.hn01.cn/wwl/upload/2021/05-30/kwpyey5xv2l.jpg",
"http://pic3.hn01.cn/wwl/upload/2021/05-30/w2xjeyllq1k.jpg",
]
tasks = []
for url in url_list:
t = asyncio.create_task(download(url))
tasks.append(t)
await asyncio.wait(tasks)
if __name__ == '__main__':
asyncio.run(main())
//********** 下载小说
import requests
from lxml import etree
import time
import asyncio
import aiohttp
import aiofiles
def get_every_chapter_url(url):
while 1:
try:
headers = {
"user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.114 Safari/537.36"
}
resp = requests.get(url, verify=False) # verify=False 网络问题
# 报错的话,可以吧 verify=False 去掉!!!!
resp.encoding = "GBK"
tree = etree.HTML(resp.text)
href_list = tree.xpath("//div[@class='booklist clearfix']/span/a/@href")
print(href_list)
return href_list
except:
print("重来一次")
time.sleep(3)
async def download_one(url):
while 1: # 我这里只是重试. 如果需要次数判断. 加上次数就可以了
try:
async with aiohttp.ClientSession() as session:
async with session.get(url) as resp:
resp.encoding = "GBK"
page_source = await resp.text()
# 开始解析
tree = etree.HTML(page_source)
title = tree.xpath("//div[@class='chaptertitle clearfix']/h1/text()")[0].strip()
content = "\n".join(tree.xpath("//div[@id='BookText']/text()")).replace("\u3000", "")
async with aiofiles.open(f"./这个文章名字不允许/{title}.txt", mode="w", encoding='utf-8') as f:
await f.write(content)
break
except:
print("报错了. 重试一下 ", url)
print("下载完毕", url)
async def download(href_list):
tasks = []
for href in href_list:
t = asyncio.create_task(download_one(href))
tasks.append(t)
await asyncio.wait(tasks)
def main():
url = "https://www.zanghaihua.org/mingchaonaxieshier/"
# 1. 拿到页面当中每一个章节的url
href_list = get_every_chapter_url(url)
# 2. 启动协程. 开始一节一节的下载
asyncio.run(download(href_list)) # 运行起来协程任务
if __name__ == '__main__':
main()
# 细节:把这个格式改一下 网站形式有变化!! 但是,数据还在源代码里面。
协程
最新推荐文章于 2024-07-08 03:28:55 发布