# /usr/bin/env python
# -*- coding: utf-8 -*-
# __Author__: yunrui
# __Date__: 2019/8/16
# 从中国评书网下载评书《射雕英雄传》
import requests
import re
import os
import urllib
import time
from getHeaders import getHeaders
# 变量配置
# 文件夹名称
novel_file_name = '射雕英雄传'
# 每集mp3文件的前缀
novel_chinese_name = '射雕英雄传_刘少佐'
# 在中国评书网上可以看到目标评书的id号
novel_id = 5277
# 同上,也知道目标评书有多少集
novel_max_count = 200
isExists = os.path.exists(novel_file_name)
if not isExists:
os.makedirs(novel_file_name)
#第一章url没有后面的count号,比较特殊,单独处理,代码和下面是一样的
isExists = os.path.exists('%s\\%s_1.mp3' % (novel_file_name, novel_chinese_name))
name = '%s_1' % novel_chinese_name
if not isExists:
url = "http://www.zgpingshu.com/down/%d/" % novel_id
response = requests.get(url = url, headers = getHeaders())
response.encoding = 'gb2312'
html = response.text
download_url = re.findall(r'<a href="(.*?)" id=\'down\'', html)[0]
urllib.request.urlretrieve(download_url, '%s\\%s.mp3' % (novel_file_name, name))
# 这里注意,一定要关闭response,最好time.sleep一段时间,不然网站会认为你恶意攻击而关闭连接
response.close()
time.sleep(1)
print('%s下载完毕' % name)
count = 2
while count <= novel_max_count:
isExists = os.path.exists('%s\\%s_%d.mp3' % (novel_file_name, novel_chinese_name, count))
name = '%s_%d' % (novel_chinese_name, count)
if not isExists:
url = "http://www.zgpingshu.com/down/%d/%s.html" % (novel_id, count)
response = requests.get(url = url, headers = getHeaders())
response.encoding = 'gb2312'
html = response.text
download_url = re.findall(r'<a href="(.*?)" id=\'down\'', html)[0]
urllib.request.urlretrieve(download_url, '%s\\%s.mp3' % (novel_file_name, name))
response.close()
time.sleep(1)
print('%s下载完毕' % name)
count += 1
getHeaders是我自己写的一个随机生成header的函数,很简单,在我另一篇博文中有代码;
如果有报10061的错误,大多数情况下是网络的问题,代码是没有问题的,可以再跑一遍试试