import aiofiles def get_every_chapter_url(url): resp = requests.get(url) resp.encoding = 'gbk' tree = etree.HTML(resp.text) href_list = tree.xpath('//div[@class="section-box"]/ul/li/a/@href') return href_list async def download_one(url): print(f"{url}:开始了") while 1: try: async with aiohttp.ClientSession() as session: async with session.get(url) as resp: text_code = resp.get_encoding() # 这里报错 d = (await resp.text())
raceback (most recent call last):
File "", line 26, in download_one
d = (await resp.text())
File "C:\Users\Administrator\AppData\Local\Programs\Python\Python39-32\lib\site-packages\aiohttp\client_reqrep.py", line 1087, in text
return self._body.decode( # type: ignore[no-any-return,union-attr]
File "C:\Users\Administrator\AppData\Local\Programs\Python\Python39-32\lib\encodings\utf_8_sig.py", line 23, in decode
(output, consumed) = codecs.utf_8_decode(input, errors, True)
UnicodeDecodeError: 'utf-8' codec can't decode bytes in position 1017-1018: invalid continuation byte
根据上面的报错提示查看源码如下:
return self._body.decode( # type: ignore[no-any-return,union-attr]
encoding, errors=errors
)
# type: ignore[no-any-return,union-attr 注意这句话 :是指await resp.text(这里面的参数填"utf-8", "ignore",报错消失)
async def download_one(url):
print(f"{url}:开始了")
while 1:
try:
async with aiohttp.ClientSession() as session:
async with session.get(url) as resp:
text_code = resp.get_encoding()
d = (await resp.text("utf-8", "ignore"))