最近学了异步/协程,于是想模仿写一个爬取小说的代码
遇到了好几个坑这里记录一下:
导的包
import asyncio
import aiofiles
import requests
from bs4 import BeautifulSoup
from lxml import etree
import aiohttp
import time
主函数
url = 'http://www.purepen.com/sgyy/'
def main():
head = {
'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.182 Safari/537.36'
}
resp = requests.get(url, headers=head)
# 将响应解码
resp.encoding = 'gbk'
# Xpath解析响应,获取所有的a标签的href值
t_resp = etree.HTML(resp.text)
trs = t_resp.xpath('/html/body/center/table[2]//a/@href')