爬书

最新推荐文章于 2022-09-15 13:17:31 发布

wh0am1·

最新推荐文章于 2022-09-15 13:17:31 发布

阅读量278

点赞数 1

分类专栏： Python 文章标签： python

本文链接：https://blog.csdn.net/qq_44223394/article/details/116313527

版权

Python 专栏收录该内容

36 篇文章 5 订阅

订阅专栏

import requests
from bs4 import BeautifulSoup
from urllib.parse import quote
from tqdm import tqdm

def downloadmain(bookname):
    url = 'https://ebook2.lorefree.com/site/index?s='+str(bookname)
    rs = requests.get(url)
    bs=BeautifulSoup(rs.text,'lxml')
    bn=[]
    bu=[]
    a=bs.select('#app > div > div.site-index > div.body-content > div:nth-child(2) > div>div>div>a')
    h = bs.select('#app > div > div.site-index > div.body-content > div:nth-child(2) > div>div>div>a>h4')
    for bookurls in a:
        bookurl="https://ebook2.lorefree.com/"+bookurls['href']
        bu.append(bookurl)
    count=1
    for booknames in h:
        bn.append(booknames)
        print("*["+str(count)+"] "+booknames.get_text())
        count+=1
    getTime(bu)

def getTime(bu):
    n=int(input("please choose downloadNumber: "))-1
    url=bu[n]
    rs = requests.get(url)
    bs = BeautifulSoup(rs.text, 'lxml')
    tmp=bs.select("#app > div > div > div.col-lg-12 > div:nth-child(6)>div>p:nth-child(2)")
    downUrl=""
    book=""
    booknames=bs.select('#app > div > div > div.col-lg-12 > div:nth-child(6)>div>p>span:nth-child(2)')
    bkn=""
    for bn in booknames:
        book=quote(bn.get_text(), 'utf-8')
        bkn=bn.get_text()
    for t in tmp:
        time=t.get_text().replace(" ","")[:11]
        year=time.split("-")[0]
        month=time.split("-")[1]
        date=time.split("-")[2]
        time2=t.get_text().replace(" ", "")[11:19].replace(":", "")
        s1 = time2[0:4]
        s2=""
        for i in range(1,6):
            if time2[4:5] == "0":
                s2 = str(0) + str(int(time2[4:6]) - i)
            else:
                s2 =int(time2[4:6]) - i
            time2 = str(s1) + str(s2)
            downUrl ="https://ebookimg.lorefree.com/assets/file"+"/"+str(year)+"/"+str(month)+"/"+str(date)+"/"+str(time2)+"/"+str(book)
            downUrl=downUrl.replace("\n","")
            ret=testExist(downUrl)
            if ret == 1:
                down(bkn,downUrl)
                break


def testExist(url):
    r = requests.get(url, stream=True)
    if str(r.status_code) == "200":
        return 1

def down(bookname,url):
    print(bookname+"  downloading......")
    response = requests.get(url, stream=True)
    total_size_in_bytes = int(response.headers.get('content-length', 0))
    block_size = 1024  # 1 Kibibyte
    progress_bar = tqdm(total=total_size_in_bytes, unit='iB', unit_scale=True)
    with open(bookname, 'wb') as file:
        for data in response.iter_content(block_size):
            progress_bar.update(len(data))
            file.write(data)
    progress_bar.close()
    print("\n file download successful！")

if __name__ == '__main__':
    bookname = input("Enter you want to download bookname: ")
    downloadmain(bookname)

wh0am1·

关注

1
点赞
踩
1

收藏

觉得还不错? 一键收藏
1
评论
爬书

import requestsfrom bs4 import BeautifulSoupfrom urllib.parse import quotefrom tqdm import tqdmdef downloadmain(bookname): url = 'https://ebook2.lorefree.com/site/index?s='+str(bookname) rs = requests.get(url) bs=BeautifulSoup(rs.text,'lxm
复制链接

扫一扫

专栏目录