下载评书

最新推荐文章于 2024-09-22 17:12:26 发布

weixin_30794851

最新推荐文章于 2024-09-22 17:12:26 发布

阅读量280

点赞数 1

文章标签： python

原文链接：http://www.cnblogs.com/RyanZhou/p/11367079.html

版权

# /usr/bin/env python
# -*- coding: utf-8 -*-
# __Author__: yunrui
# __Date__:   2019/8/16

# 从中国评书网下载评书《射雕英雄传》

import requests
import re
import os
import urllib
import time

from getHeaders import getHeaders

# 变量配置
# 文件夹名称
novel_file_name = '射雕英雄传'
# 每集mp3文件的前缀
novel_chinese_name = '射雕英雄传_刘少佐'
# 在中国评书网上可以看到目标评书的id号
novel_id = 5277
# 同上，也知道目标评书有多少集
novel_max_count = 200

isExists = os.path.exists(novel_file_name)
if not isExists:
    os.makedirs(novel_file_name)


#第一章url没有后面的count号，比较特殊，单独处理，代码和下面是一样的
isExists = os.path.exists('%s\\%s_1.mp3' % (novel_file_name, novel_chinese_name))
name = '%s_1' % novel_chinese_name
if not isExists:    
    url = "http://www.zgpingshu.com/down/%d/" % novel_id
    response = requests.get(url = url, headers = getHeaders())
    response.encoding = 'gb2312'
    html = response.text  
    download_url = re.findall(r'<a href="(.*?)" id=\'down\'', html)[0]
    urllib.request.urlretrieve(download_url, '%s\\%s.mp3' % (novel_file_name, name))
    # 这里注意，一定要关闭response，最好time.sleep一段时间，不然网站会认为你恶意攻击而关闭连接
    response.close()
    time.sleep(1)
print('%s下载完毕' % name)
    

count = 2
while count <= novel_max_count:
    isExists = os.path.exists('%s\\%s_%d.mp3' % (novel_file_name, novel_chinese_name, count))
    name = '%s_%d' % (novel_chinese_name, count)
    if not isExists: 
        url = "http://www.zgpingshu.com/down/%d/%s.html" % (novel_id, count)
        response = requests.get(url = url, headers = getHeaders())
        response.encoding = 'gb2312'
        html = response.text
        download_url = re.findall(r'<a href="(.*?)" id=\'down\'', html)[0]
        urllib.request.urlretrieve(download_url, '%s\\%s.mp3' % (novel_file_name, name))
        response.close()
        time.sleep(1)
    print('%s下载完毕' % name)
    count += 1

getHeaders是我自己写的一个随机生成header的函数，很简单，在我另一篇博文中有代码；
如果有报10061的错误，大多数情况下是网络的问题，代码是没有问题的，可以再跑一遍试试

转载于:https://www.cnblogs.com/RyanZhou/p/11367079.html