python 百度文库_百度文库文档下载地址解析python版【摸索不易,还请支持】

[Python] 纯文本查看 复制代码import requests

from bs4 import BeautifulSoup

class BaiDuWenKu(object):

def __init__(self):

# 初始化操作

headers = {

"User-Agent":

"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:77.0) Gecko/20100101 Firefox/77.0"

}

self.session = requests.Session()

self.session.headers.update(headers)

def setBduss(self, bduss):

'''

设置cookie,将作为解析下载时的身份验证

'''

cookies = {"BDUSS": bduss}

self.session.cookies.update(cookies)

def parserFileInfo(self, file_url):

'''

根据文档url解析文档信息

'''

res = self.session.get(file_url)

soup = BeautifulSoup(res.text, "html.parser")

file_info_options = [

"ct", "doc_id", "retType", "sns_type", "storage", "useTicket",

"target_uticket_num", "downloadToken", "sz", "v_code", "v_input"

]

data = {}

try:

for option in file_info_options:

value = soup.find(

"input", attrs={

"name": option

}).get("value") or ""

data[option] = value

except BaseException as e:

raise Exception("文档信息解析失败!")

data["req_vip_free_doc"] = "1"

return data

def download(self, data):

'''

根据解析的数据下载文档

'''

url = "https://wenku.baidu.com/user/interface/getvipfreedoc"

params = {"doc_id": data["doc_id"]}

jRes = self.session.get(url, params=params).json()

if not jRes["data"]["is_vip_free_doc"]:

raise Exception("只能下载vip免费文档哦")

url = "https://wenku.baidu.com/user/submit/download"

res = self.session.post(url, data=data, allow_redirects=False)

if res.status_code == 302:

return res.headers.get("Location")

else:

raise Exception("下载失败,请稍后再试!")

def main():

'''

负责逻辑调度

'''

bduss = "Vx*************81djg2WDgtUU5pMkE5UnhmRV*********AAAAAAEAAABlTk4wbnZwZW5neW91amlnZQAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAIBo9V6AaPVeWG"

bdwk = BaiDuWenKu()

bdwk.setBduss(bduss)

# url=input("请输入要下载的文档url:")

url = "https://wenku.baidu.com/view/96fec4f4f321dd36a32d7375a417866fb94ac074.html?fr=search-income2"

file_info = bdwk.parserFileInfo(url)

download_url = bdwk.download(file_info)

input("文档下载地址为:{}\n请及时下载".format(download_url))

if __name__ == "__main__":

try:

main()

except BaseException as e:

input(e)

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值