# 1 , 多线程
from threading import Thread
def func():
for i in range(1000):
print("线程1", i)
def func2():
for i in range(1000):
print("线程2", i)
if __name__ == '__main__':
t = Thread(target=func())
t.start() # 开辟一个线程,具体实现要看cpu的
t2 = Thread(target=func())
t2.start() # 开辟一个线程,具体实现要看cpu的
for i in range(1000):
print("主线程", i)
# 线程池,一次性开辟多个线程
from concurrent.futures import ThreadPoolExecutor,ProcessPoolExecutor
def func(name):
for i in range(1000):
print(name,i)
if __name__ == '__main__':
# 创建线程池
with ThreadPoolExecutor(50) as t:
for i in range(100):
t.submit(func,name = f"线程{i}")
# 等待线程池完成任务之后,才进行下一步操作
print("ok")
异步协程
import asyncio
import time
# 4 爬虫应用 , 相当于一个模板
async def downLoad(url):
print("开始下载")
await asyncio.sleep(2) # 下载两秒
print("下载完成")
async def main():
urls = [
"https://www.baidu.com",
"https://www.baida.com",
"https://www.aidu.com"
]
tasks = []
for url in urls:
d = downLoad(url)
tasks.append(d)
await asyncio.wait(tasks)
if __name__ == '__main__':
t1 = time.time()
asyncio.run(main())
t2 = time.time()
print(t2 - t1) # 得出时间
import asyncio
import aiohttp
# 5 案例
async def urlDownload(url):
name = url.rsplit('/', 1)[1]
# aiohttp.ClientSession() == requests
# 在异步协程中with前面必须写上async
# with可以不用手动关闭了,自动关闭
async with aiohttp.ClientSession() as session:
# session.get session.post 左边是异步操作的,右边是requests
# resp.content.read() == resp.content
# resp.text() == resp.text
# resp.json() == resp.json()
async with session.get(url) as resp:
with open(f"{name}.jepg", mode="wb") as f:
f.write(await resp.content.read()) # 读取内容是异步的,需要挂起
async def main():
urls = [
"https://image.sitapix.com/index-thumb/sitapix-photo-2290543-via-sitapix-com.jpeg",
"https://image.sitapix.com/index-thumb/bloom-blooming-blossom-130168-via-sitapix-com.jpeg",
"https://image.sitapix.com/index-thumb/albums-antique-audio-1181789-via-sitapix-com.jpeg"
]
tasks = []
for url in urls:
d = urlDownload(url)
tasks.append(d)
await asyncio.wait(tasks)
if __name__ == '__main__':
asyncio.run(main())
这个是文件的异步操作
async with aiofiles.open(f"./novel/{n}.txt", mode="w", encoding="utf-8") as f:
await f.write(message)
# with open(f"./novel/{n}.txt", mode="w", encoding="utf-8") as f:
# f.write(message)
import asyncio
import aiohttp
import requests
from lxml import etree
import aiofiles
from concurrent.futures import ThreadPoolExecutor
import time
import random
requests.packages.urllib3.disable_warnings() # 关闭:不安全请求警告:正在发出未验证的HTTPS请求。强烈建议添加证书验证。
# 1,同步
# 2,异步
def getBookid(url_1):
url = "http://quanxiaoshuo.com"
resp = requests.get(url_1, verify=False) # 给requests.get()传入 verify=False 避免ssl认证。
et = etree.HTML(resp.text)
list_1 = et.xpath("//div[@class='chapter']")
data = []
for item in list_1:
# print(type(item))
getData = url + item.xpath("./a/@href")[0]
data.append(getData)
# break
# for i in data:
# print(i)
return data
async def downLoad(url,n):
async with aiohttp.ClientSession() as session:
async with session.get(url) as resp:
txt = await resp.text() ##########
# print(txt)
et = etree.HTML(txt)
message = ''.join(et.xpath('//*[@id="content"]/text()')) # 可以将列表中的各个字符连接起来
message = ''.join(message.split())
# print(message)
# print("-"*100)
async with aiofiles.open(f"./novel/{n}.txt", mode="w", encoding="utf-8") as f:
await f.write(message)
# with open(f"./novel/{n}.txt", mode="w", encoding="utf-8") as f:
# f.write(message)
async def main(urls):
n = 1
tasks = []
for url in urls:
tasks.append(downLoad(url,n))
print("第%d回" % n)
n += 1
await asyncio.wait(tasks)
if __name__ == '__main__':
a = 0
b = 100
url_1 = "http://quanxiaoshuo.com/179092/"
data = getBookid(url_1)
asyncio.run(main(data[:5]))
# for i in range(10):
# # time.sleep(random.randint(1,4))
# asyncio.run(main(data[a:b]))
# a += 5
# b += 5
print("ok")