用爬虫下载漫画
import requests
from lxml import etree
from selenium import webdriver
from pyquery import PyQuery as pq
import time,os
daihao=9845
name='全职法师'
head={'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.114 Safari/537.36'}
browser = webdriver.Chrome()
browser.get(f'http://www.qiman6.com/{daihao}/')
browser.find_element_by_xpath('//*[@id="chapterlistload"]/div[2]/span').click()
time.sleep(1)
# browser.find_element_by_xpath('//*[@id="chapterList"]/div[2]/a').click()
time.sleep(1)
_html=browser.page_source
html=etree.HTML(str(pq(_html)))
links=html.xpath('//*[@id="chapter-list1"]/a/@href')
text=html.xpath('//*[@id="chapter-list1"]//a/text()')
print(text)
print(len(text))
print(links)
def returnfiles():
for root, dirs, files in os.walk(f'D:/1我的文件夹/python___study/python/爬虫/爬漫画/{name}'):
return files
print(returnfiles())
for index,i in enumerate(links):
if f'{text[index]}_{0}.jpg' in returnfiles() :
print(f'本章节 {text[index]} 已下载 pass')
continue
try:
browser.get(f'http://www.qiman6.com{i}')
time.sleep(2)
src=browser.find_elements_by_xpath('//*[@id="mainView"]/ul//img')
new_links=[x.get_attribute('data-src') for x in src]
print(f'正在下载{text[index]}共{len(new_links)}幅')
for new_index,link in enumerate(new_links):
try:
time.sleep(0.5)
with open(f'{name}/{text[index]}_{new_index}.jpg','wb+') as file:
print(f'已下载{text[index]}_{new_index}.jpg {link}')
date=requests.get(link).content
file.write(requests.get(link).content)
except:
pass
except:
pass
最后运行的结果