python网络爬虫实战之下载笔趣看小说网小说

更多爬虫内容请关注博主专栏python3网络爬虫

笔趣看小说网:https://www.biqukan.com/

就拿最近热播的猫腻小说《将夜》来实验吧

代码

import requests,sys,os
from bs4 import BeautifulSoup

'''

    @ author: Face_to_sun
    @ modigy: 2018-12-1

'''

def getListHTML(url):
    headers = {
        'Referer': 'https://www.biqukan.com/s.php?ie=gbk&s=2758772450457967865&q=%BD%AB%D2%B9',
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.62 Safari/537.36'
    }
    try:
        response = requests.get(url, headers=headers)
        response.raise_for_status()
        response.encoding = response.apparent_encoding
        return response.text
    except:
        return "产生异常"

def downLoadPage(url, filename):
    headers = {
        'Referer': 'https://www.biqukan.com/2_2758/',
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.62 Safari/537.36'
    }
    response = requests.get(url, headers=headers, timeout=30)
    soup = BeautifulSoup(response.text,'lxml')
    content = soup.find_all('div', class_='showtxt')
    try:  
        print(content[0].text.replace('\xa0'*8,'\n\n'))    
        with open(filename+'.txt', 'w', encoding='utf8') as f:
            f.write(content[0].get_text().replace('\xa0'*8,'\n\n'))
    except:
        print("失败")

def getUrl(soup):
    server = "https://www.biqukan.com"
    div = soup.find_all('div', class_='listmain')
    # print(div)
    Tag_a = div[0].find_all('a')
    nums = len(Tag_a)
    for index in range(len(Tag_a)):
        page_url = server + Tag_a[index].get('href')
        name = Tag_a[index].string
        # print(name, page_url)
        downLoadPage(page_url, name)
        sys.stdout.write("已下载:%.3f%%" % float(index/nums) + '\r')
        sys.stdout.flush()


if __name__  == "__main__":
    url = "https://www.biqukan.com/2_2758/"
    html = getListHTML(url)
    soup = BeautifulSoup(html,'lxml')
    getUrl(soup)


效果
在这里插入图片描述

  • 0
    点赞
  • 2
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值