使用异步aiohttp请求,获取笔趣阁的小说
笔趣阁的url https://biquge96.com/
先在里面找到你想爬取的小说
我获取的是万古第一神这本小说
我这里就先获取前十张的内容
import asyncio
import aiohttp
import logging
from aiohttp import TCPConnector
from parsel import Selector
import requests
logging.basicConfig(level=logging.INFO,
format='%(asctime)s - %(levelname)s: %(message)s')
INDEX_URL = "https://biquge96.com{id}"
# 这样写是为了后面的代码拼接
headers = {
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/99.0.4844.74 Safari/537.36 Edg/99.0.1150.52"
}
session = None
MONGO_CONNECTION_STRING = 'mongodb://localhost:27017'
MONGO_DB_NAME = 'xiaoshuo'
MONGO_COLLECTION_NAME = 'wgdy'
from motor.motor_asyncio import AsyncIOMotorClient
# ↑ 异步存储库需要用到这个
client = AsyncIOMotorClient(MONGO_CONNECTION_STRING)
db = client[MONGO_DB_NAME]
collection = db[MONGO_COLLECTION_NAME]
def get_url():
"""
先用requests.get()请求获取这本小说每个章节对应的id
在合并成每个章节的url,将它们添加到列表中
最后返回所以url的列表
:return: