1.相关网址和库
参考文章:漫画下载,动态加载、反爬虫这都不叫事!
网址
https://www.dmzj.com/
需要用到的库
requests、beautifulsoup、tqdm
2.代码实现
import requests
import re
from bs4 import BeautifulSoup
from contextlib import closing
from tqdm import tqdm
import os
import time
class MangaSpider(object):
def __init__(self):
self.save_dir = '妖神记'
if not os.path.exists(self.save_dir):
os.mkdir(self.save_dir)
self.target_url = 'https://www.dmzj.com/info/yaoshenji.html'
self.chapter_list = []
# 1.发送请求
def send_request(self, url):
response = requests.get(url)
data = response.content.decode('utf-8')
return data
# 2.解析数据
def parse_list_data(self, data):
bs = BeautifulSoup(data