mangabz漫画爬虫
遇到的问题(未解决)
1:多线程导致开启多个chrome内存溢出,或者chrome崩溃
2:下载不全需执行多次补全漫画
3:日志打印混乱
import requests
import fake_useragent
import time
from lxml import etree
import os
from selenium import webdriver
from selenium.webdriver.support import wait
from selenium.webdriver.support import expected_conditions as ec
from selenium.webdriver.common.by import By
from multiprocessing import Pool,Process
from selenium.common.exceptions import TimeoutException
header = {
'useragent': fake_useragent.UserAgent().random,
}
def get_pic(src,ipg,dir_name):
# response = requests.get(url=src[0])
# print(response)
if not os.path.exists('./pic/%s' %dir_name):
os.mkdir('./pic/%s' %dir_name)
pic_name = src[0].split('?')[0].split('/')[-1]
if not os.path.exists(os.getcwd()+'/pic/%s' %dir_name+'/'+pic_name):
print('开始