爬书

import requests
from bs4 import BeautifulSoup
from urllib.parse import quote
from tqdm import tqdm

def downloadmain(bookname):
    url = 'https://ebook2.lorefree.com/site/index?s='+str(bookname)
    rs = requests.get(url)
    bs=BeautifulSoup(rs.text,'lxml')
    bn=[]
    bu=[]
    a=bs.select('#app > div > div.site-index > div.body-content > div:nth-child(2) > div>div>div>a')
    h = bs.select('#app > div > div.site-index > div.body-content > div:nth-child(2) > div>div>div>a>h4')
    for bookurls in a:
        bookurl="https://ebook2.lorefree.com/"+bookurls['href']
        bu.append(bookurl)
    count=1
    for booknames in h:
        bn.append(booknames)
        print("*["+str(count)+"] "+booknames.get_text())
        count+=1
    getTime(bu)

def getTime(bu):
    n=int(input("please choose downloadNumber: "))-1
    url=bu[n]
    rs = requests.get(url)
    bs = BeautifulSoup(rs.text, 'lxml')
    tmp=bs.select("#app > div > div > div.col-lg-12 > div:nth-child(6)>div>p:nth-child(2)")
    downUrl=""
    book=""
    booknames=bs.select('#app > div > div > div.col-lg-12 > div:nth-child(6)>div>p>span:nth-child(2)')
    bkn=""
    for bn in booknames:
        book=quote(bn.get_text(), 'utf-8')
        bkn=bn.get_text()
    for t in tmp:
        time=t.get_text().replace(" ","")[:11]
        year=time.split("-")[0]
        month=time.split("-")[1]
        date=time.split("-")[2]
        time2=t.get_text().replace(" ", "")[11:19].replace(":", "")
        s1 = time2[0:4]
        s2=""
        for i in range(1,6):
            if time2[4:5] == "0":
                s2 = str(0) + str(int(time2[4:6]) - i)
            else:
                s2 =int(time2[4:6]) - i
            time2 = str(s1) + str(s2)
            downUrl ="https://ebookimg.lorefree.com/assets/file"+"/"+str(year)+"/"+str(month)+"/"+str(date)+"/"+str(time2)+"/"+str(book)
            downUrl=downUrl.replace("\n","")
            ret=testExist(downUrl)
            if ret == 1:
                down(bkn,downUrl)
                break


def testExist(url):
    r = requests.get(url, stream=True)
    if str(r.status_code) == "200":
        return 1

def down(bookname,url):
    print(bookname+"  downloading......")
    response = requests.get(url, stream=True)
    total_size_in_bytes = int(response.headers.get('content-length', 0))
    block_size = 1024  # 1 Kibibyte
    progress_bar = tqdm(total=total_size_in_bytes, unit='iB', unit_scale=True)
    with open(bookname, 'wb') as file:
        for data in response.iter_content(block_size):
            progress_bar.update(len(data))
            file.write(data)
    progress_bar.close()
    print("\n file download successful!")

if __name__ == '__main__':
    bookname = input("Enter you want to download bookname: ")
    downloadmain(bookname)
  • 1
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 1
    评论
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值