利用Python下载懒人听书音频保存

用Python爬到音频下载地址,再用Internet Download Manager批量自动下载

# -*- coding: utf-8 -*-
"""
Created on Fri Dec 14 21:07:11 2018
@author: fuwen
"""
from subprocess import call
import os,requests, base64, json, time
import jsonpath
import html
from bs4 import BeautifulSoup
type=1
#https://www.lrts.me/album/400485 bookid
BookID = 99144
#
Sections=0
#一共多少章节
Chapters=92
#保存路径
FilePath = 'd:\\mp3'
#删除文件夹所有文件,并且重新建立
def delDir(TitleName):
    global FilePath
    FilePath=FilePath+"\\"+TitleName
    if os.path.exists(FilePath) is True:
        os.system('RD /q /s ' + FilePath)
    print()
    os.mkdir(FilePath)
    
#使用IDM安装下载
IdmPath = 'd:\\Internet Download Manager\\IDMan.exe'
#关闭autoit 编写程序检测弹出框点否程序
exe_name="lrts.exe"
def kill_exe(exe_name):
    """
    杀死exe进程
    :param exe_name:进程名字
    :return:无
    """
    os.system('taskkill /f /t /im '+exe_name)#MESMTPC.exe程序名字
    print("杀死进程{}".format(exe_name))
def get_json_value(json_data,key_name):
    '''获取到json中任意key的值,结果为list格式'''
    # key_value = jsonpath.jsonpath(json_data, '$..{key_name}'.format(key_name=key_name))
    # https://blog.csdn.net/cling_snail/article/details/80980296
    key_value = jsonpath.jsonpath(json_data, '$[*].{key_name}'.format(key_name=key_name))
    #key的值不为空字符串或者为empty(用例中空固定写为empty)返回对应值,否则返回empty
    
    return key_value
def getChapterTitle(html):
        soup = BeautifulSoup(html, 'html.parser')
        
        
        tilename = soup.find('h1', attrs={"class": "nowrap"})  # 查找span class为red的字符串
        return tilename.text
def IdmDownLoad(DownloadUrl, Mp3Name):
    call([IdmPath, '/d',DownloadUrl,'/p',FilePath,'/f',Mp3Name,'/n'])
def IdmDownLoadChangeName(DownloadUrl, Mp3Name):
    # call([IdmPath, '/d',DownloadUrl,'/p',FilePath,'/n']) 
    begin=DownloadUrl.rfind('/')
    end=DownloadUrl.rfind('?')
    src_name=DownloadUrl[begin+1:end]
    os.rename(FilePath+"\\"+src_name,FilePath+"\\"+Mp3Name) 
def ChangeFileName(filename):
    filename = filename.replace('\\','')
    filename = filename.replace('/','')
    filename = filename.replace(':','')
    filename = filename.replace('*','')
    filename = filename.replace('“','')
    filename = filename.replace('”','')
    filename = filename.replace('<','')
    filename = filename.replace('>','')
    filename = filename.replace('|','')
    filename = filename.replace('?','?')
    filename = filename.replace('(','(')
    filename = filename.replace(chr(65279),'') # UTF-8+BOM
#    print(ord(filename[0]))
    filename = filename.split('(')[0]
    return html.unescape(filename)
if __name__ == "__main__":
    Mp3ListJsonUrl = 'https://www.lrts.me/ajax/album/{0}/{1}/{2}'.format(BookID,Sections,Chapters)
    headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.95 Safari/537.36'}
    TitleUrl = 'https://www.lrts.me/album/{0}'.format(BookID)
    conn = requests.session()
    Mp3ListDic={}
    Mp3ListJson = conn.get(Mp3ListJsonUrl, headers = headers)
    Mp3TitleJson = conn.get(TitleUrl, headers = headers)
    
    titlename=getChapterTitle(Mp3TitleJson.text)
    if titlename is None:
        print("没找到这个题目")
        exit() 
    print(titlename)
    delDir(titlename)
    Mp3ListJson = json.loads(Mp3ListJson.text)
    
    # print(Mp3ListJson["data"]["data"])
    Josndata=Mp3ListJson["data"]["data"]
    Mp3IdList=get_json_value(Josndata,"id")
    # print(Mp3IdList)
    Mp3IdNameList=get_json_value(Josndata,"name")
    for Item in Mp3IdNameList:
        index=Mp3IdNameList.index(Item)
        Mp3ListDic[html.unescape(Item)]=Mp3IdList[index]
        # Mp3ListDic[Mp3IdList[index]]=html.unescape(Item)
    # print(Mp3ListDic)
    Mp3List = Josndata
    Mp3NameList = [Mp3dict['name'] for Mp3dict in Mp3List]
    Mp3NameList = [ChangeFileName(i) for i in Mp3NameList]
    # print(Mp3NameList)
    AlreadyDown = [FileName.replace('.mp3','') for FileName in os.listdir(FilePath)]
    Count = 0
    os.startfile(exe_name)
    time.sleep(2)
    for Mp3Name in Mp3NameList :
        Count+=1
        # if Count==2:
        #     break
        if Mp3Name in AlreadyDown :
            continue
        #type=1&resourcesid=99144&sections=1\
        Mp3JsonUrl="https://www.lrts.me/ajax/playlist/{0}/{1}/{2}".format(type,BookID,Count)
        Mp3Url = conn.get(Mp3JsonUrl, headers = headers)
        # print(Mp3Url.text)
        html=Mp3Url.text
        soup = BeautifulSoup(html, 'html.parser')
        mp3id="section"+str(Mp3ListDic[Mp3Name])
        print(mp3id)
        s1 = soup.find('li', attrs={"id": mp3id})  # 查找span class为red的字符串
        DownloadUrl = s1.find('input')["value"]
        print(DownloadUrl)
        try :
            
            IdmDownLoad(DownloadUrl,Mp3Name+'.mp3')
            time.sleep(2)
        except :
            print('%s,未购买,跳过……'%Mp3Name)
    kill_exe(exe_name)

再用autoit写个程序关闭一下idm烦人的提示框

$winTitle="Internet Download Manager"
While 1
	$winHandle = WinGetHandle($winTitle);
	ConsoleWrite(WinGetTitle($winHandle))
	If WinGetTitle($winHandle)=="Internet Download Manager" Then
	WinActivate($winHandle)
	EndIf
	If  WinWaitActive("Internet Download Manager") Then
		;ConsoleWrite($winHandle)
	ControlClick($winHandle, "", "[ID:7]")
	EndIf
	
	
	Sleep(100)
	WEnd
  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值