python爬虫十四：selenium模拟浏览器+chrome 批量下载漫画

最新推荐文章于 2024-08-19 18:28:14 发布

照片怎么加不上

最新推荐文章于 2024-08-19 18:28:14 发布

阅读量1.5k

点赞数

分类专栏： python爬虫基础

python爬虫基础专栏收录该内容

18 篇文章 3 订阅

订阅专栏

转：https://zhuanlan.zhihu.com/p/26701898

# -*- coding: utf-8 -*-

from selenium import webdriver
from time import sleep
import os
import requests

#批量下载漫画
#https://manhua.sfacg.com/


#创建文件夹
def creatDir(path):
    if not os.path.exists(path):
        os.mkdir(path)



#保存图片的方法
def savePic(path,page,url):
    #通过requests库爬取
    content=requests.get(url).content



    path=path+'//'+page+'.png'
    with open(path,'wb') as f:
        f.write(content)



def get_info():
        '''
            获取漫画的目录中的每一章节的url连接
            并返回一个字典类型k：漫画名 v：章节链接
            '''
        url_list = []
        diver=webdriver.Chrome()

        diver.get('https://manhua.sfacg.com/mh/wqds/')

        # 找到漫画标题 并创建目录
        title=diver.find_element_by_tag_name('h1').text
        print (title)
        #查询出集合
        li_list=diver.find_elements_by_class_name('comic_Serial_list')
        print(len(li_list))

        for li in li_list:
            #寻找所有的a标签查询集合是elements
            a_list=li.find_elements_by_tag_name('a');
            for a in a_list:
                #print(a.get_attribute('href'))
                url_list.append(a.get_attribute('href'))



        #关闭浏览器
        #diver.quit()

        #存入字典
        com=dict(name=title,urls=url_list)

        return com


def get_pic(Comics):
    '''
    打开每个章节的url，
    找到漫画图片的地址，
    并写入到本地
    '''
    urls=Comics['urls']
    name=Comics['name']

    diver = webdriver.Chrome()


    a=0
    for url in urls:
        #这是就打开了漫画的界面的章节首页
        diver.get(url)
        diver.implicitly_wait(3)
        a+=1

        # 创建文件夹,保存漫画
        path = 'C://Users//Administrator//Desktop//img//' + name
        creatDir(path)
        path=path +"//"+'第'+ str(a) +'章'
        creatDir(path)
        #查看总共有多少页,
        '''
        <select id="pageSel" onchange="GoSelPage()"><option value="1">第1页</option><option value="2">第2页</option>
        <option value="3">第3页</option><option value="4">第4页</option><option value="5">第5页</option><option value="6">第6页
        </option><option value="7">第7页</option><option value="8">第8页</option><option value="9">第9页</option></select>
        
        '''
        pages=diver.find_elements_by_tag_name('option')
        print(len(pages))

        # 找到下一页的按钮
        #<a href="javascript:NextPage();" class="redfont_input">下一页</a>
        nextpage = diver.find_element_by_xpath('//*[@id="AD_j1"]/div/a[4]')
        for page in range(len(pages)):

            #图片的地址
            #<img alt="快捷键：A下翻页，Q上翻页" border="0" id="curPic" src="http://coldpic.sfacg.com/Pic/OnlineComic4/wqds/ZP/0072_4701/023_997.jpg">
            pic_url=diver.find_element_by_id('curPic').get_attribute('src')
            savePic(path,str(page),pic_url)
            nextpage.click()



com=get_info()
get_pic(com)