用Python爬到音频下载地址,再用Internet Download Manager批量自动下载
# -*- coding: utf-8 -*-
"""
Created on Fri Dec 14 21:07:11 2018
@author: fuwen
"""
from subprocess import call
import os,requests, base64, json, time
import jsonpath
import html
from bs4 import BeautifulSoup
type=1
#https://www.lrts.me/album/400485 bookid
BookID = 99144
#
Sections=0
#一共多少章节
Chapters=92
#保存路径
FilePath = 'd:\\mp3'
#删除文件夹所有文件,并且重新建立
def delDir(TitleName):
global FilePath
FilePath=FilePath+"\\"+TitleName
if os.path.exists(FilePath) is True:
os.system('RD /q /s ' + FilePath)
print()
os.mkdir(FilePath)
#使用IDM安装下载
IdmPath = 'd:\\Internet Download Manager\\IDMan.exe'
#关闭autoit 编写程序检测弹出框点否程序
exe_name="lrts.exe"
def kill_exe(exe_name):
"""
杀死exe进程
:param exe_name:进程名字
:return:无
"""
os.system('taskkill /f /t /im '+exe_name)#MESMTPC.exe程序名字
print("杀死进程{}".format(exe_name))
def get_json_value(json_data,key_name):
'''获取到json中任意key的值,结果为list格式'''
# key_value = jsonpath.jsonpath(json_data, '$..{key_name}'.format(key_name=key_name))
# https://blog.csdn.net/cling_snail/article/details/80980296
key_value = jsonpath.jsonpath(json_data, '$[*].{key_name}'.format(key_name=key_name))
#key的值不为空字符串或者为empty(用例中空固定写为empty)返回对应值,否则返回empty
return key_value
def getChapterTitle(html):
soup = BeautifulSoup(html, 'html.parser')
tilename = soup.find('h1', attrs={"class": "nowrap"}) # 查找span class为red的字符串
return tilename.text
def IdmDownLoad(DownloadUrl, Mp3Name):
call([IdmPath, '/d',DownloadUrl,'/p',FilePath,'/f',Mp3Name,'/n'])
def IdmDownLoadChangeName(DownloadUrl, Mp3Name):
# call([IdmPath, '/d',DownloadUrl,'/p',FilePath,'/n'])
begin=DownloadUrl.rfind('/')
end=DownloadUrl.rfind('?')
src_name=DownloadUrl[begin+1:end]
os.rename(FilePath+"\\"+src_name,FilePath+"\\"+Mp3Name)
def ChangeFileName(filename):
filename = filename.replace('\\','')
filename = filename.replace('/','')
filename = filename.replace(':','')
filename = filename.replace('*','')
filename = filename.replace('“','')
filename = filename.replace('”','')
filename = filename.replace('<','')
filename = filename.replace('>','')
filename = filename.replace('|','')
filename = filename.replace('?','?')
filename = filename.replace('(','(')
filename = filename.replace(chr(65279),'') # UTF-8+BOM
# print(ord(filename[0]))
filename = filename.split('(')[0]
return html.unescape(filename)
if __name__ == "__main__":
Mp3ListJsonUrl = 'https://www.lrts.me/ajax/album/{0}/{1}/{2}'.format(BookID,Sections,Chapters)
headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.95 Safari/537.36'}
TitleUrl = 'https://www.lrts.me/album/{0}'.format(BookID)
conn = requests.session()
Mp3ListDic={}
Mp3ListJson = conn.get(Mp3ListJsonUrl, headers = headers)
Mp3TitleJson = conn.get(TitleUrl, headers = headers)
titlename=getChapterTitle(Mp3TitleJson.text)
if titlename is None:
print("没找到这个题目")
exit()
print(titlename)
delDir(titlename)
Mp3ListJson = json.loads(Mp3ListJson.text)
# print(Mp3ListJson["data"]["data"])
Josndata=Mp3ListJson["data"]["data"]
Mp3IdList=get_json_value(Josndata,"id")
# print(Mp3IdList)
Mp3IdNameList=get_json_value(Josndata,"name")
for Item in Mp3IdNameList:
index=Mp3IdNameList.index(Item)
Mp3ListDic[html.unescape(Item)]=Mp3IdList[index]
# Mp3ListDic[Mp3IdList[index]]=html.unescape(Item)
# print(Mp3ListDic)
Mp3List = Josndata
Mp3NameList = [Mp3dict['name'] for Mp3dict in Mp3List]
Mp3NameList = [ChangeFileName(i) for i in Mp3NameList]
# print(Mp3NameList)
AlreadyDown = [FileName.replace('.mp3','') for FileName in os.listdir(FilePath)]
Count = 0
os.startfile(exe_name)
time.sleep(2)
for Mp3Name in Mp3NameList :
Count+=1
# if Count==2:
# break
if Mp3Name in AlreadyDown :
continue
#type=1&resourcesid=99144§ions=1\
Mp3JsonUrl="https://www.lrts.me/ajax/playlist/{0}/{1}/{2}".format(type,BookID,Count)
Mp3Url = conn.get(Mp3JsonUrl, headers = headers)
# print(Mp3Url.text)
html=Mp3Url.text
soup = BeautifulSoup(html, 'html.parser')
mp3id="section"+str(Mp3ListDic[Mp3Name])
print(mp3id)
s1 = soup.find('li', attrs={"id": mp3id}) # 查找span class为red的字符串
DownloadUrl = s1.find('input')["value"]
print(DownloadUrl)
try :
IdmDownLoad(DownloadUrl,Mp3Name+'.mp3')
time.sleep(2)
except :
print('%s,未购买,跳过……'%Mp3Name)
kill_exe(exe_name)
再用autoit写个程序关闭一下idm烦人的提示框
$winTitle="Internet Download Manager"
While 1
$winHandle = WinGetHandle($winTitle);
ConsoleWrite(WinGetTitle($winHandle))
If WinGetTitle($winHandle)=="Internet Download Manager" Then
WinActivate($winHandle)
EndIf
If WinWaitActive("Internet Download Manager") Then
;ConsoleWrite($winHandle)
ControlClick($winHandle, "", "[ID:7]")
EndIf
Sleep(100)
WEnd