源码
二话不说,先上代码
# -*- coding: utf-8 -*-
#项目地址:https://github.com/JackKing-defier/Baiduwenku
#@author: JackKing_defier
import requests
# from bs4 import BeautifulSoup
import bs4
import codecs
def getHTMLText(url):
kv = {'User-agent': 'Baiduspider'}
try:
r = requests.get(url, headers = kv, timeout = 30)
r.raise_for_status()
r.encoding = r.apparent_encoding
return r.text
except:
return ''
def findPList(html):
plist = []
soup = bs4.BeautifulSoup(html, "html.parser")
plist.append(soup.title.string)
for div in soup.find_all('div', attrs={"class": "bd doc-reader"}):
plist.extend(div.get_text().split('\n'))
plist = [c.replace(' ', '') for c in plist]
plist = [c.replace('\x0c', '') for c in plist]
return plist
def printPList(plist, path = 'baiduwenku.txt'):
file = codecs.open(path, 'w','utf-8')
for str in plist:
file.write(str)
file.write('\n')
file.close()
def main():
url = 'https://wenku.baidu.com/view/7da5a13cfe00bed5b9f3f90f76c66137ee064ff5.html?re=view'
html = getHTMLText(url)
plist = findPList(html)
printPList(plist)
main()
介绍
总有人想下载百度文库中的文件,却又不想去开所谓的vip,又不想用卷下载,今天我使用上面源码将其运用到了公众号上,大家便可以轻松下载百度文库,管他需不需要vip,管他需不需要下载卷,管他是不是vip专享,一律统统可以轻松下载,方法及其简单,只需两步操作:
1、扫描下方二维码进入公众号。
2、回复想要下载的百度文库链接,等待3秒钟,便可获得下载链接,点击下载链接即可获得文档内容。
看到这里是不是爽歪歪