import requests,random,os,re
from bs4 import BeautifulSoup
HomeUrl = 'http://www.22ff.com'
header = {
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.117 Safari/537.36'
}
# 搜索小说,确定是否存在,
def get_book_url(bookName):
try:
url = 'http://www.22ff.com/txt/{}'.format(bookName)
soup = requests.get(url=url,headers=header)
html = BeautifulSoup(soup.text, 'lxml')
# print(soup.status_code)
book_list = html.select('div.neirong ul li a')
link = None
for i in range(book_list.__len__()-3):
if bookName == book_list[i].get_text():
# 小说的下载页面地址
link = HomeUrl+book_list[i+3].get('href')
# print(link)
return link
except Exception as e:
print(e)
print('这里出错')
# get_book_url('大国文娱')
# 创建保存小说的文件夹
def mkdirs():
path = './小说/'
if not os.path.exists(path):
os.mkdir(path)
print('文件夹已创建成功...')
return path
else:
# print('文件夹已存在...')
return path
# print(checkFile())
#判断小说是否已下载
def checkFile(path,bookName):
flag = False
for each in os.walk(path):
for i in each[2]:
if bookName == i:
flag = True
return flag
# print(checkFile('./小说/','宇宙交易系统'))
# 进入下载页面,获取下载链接,如果用selenium写,这个函数就很简单,我这个用的笨方法,你们如果有兴趣自己改改试试
def down_book(bookName):
url1 = get_book_url(bookName)
if url1 == None:
print('暂未搜索到该小说...')
else:
print('已搜索到该资源,正在获取链接...')
try:
data = requests.get(url1, headers=header)
if data.status_code == 200:
html = BeautifulSoup(data.text, 'lxml')
# 通过字符串切割,找到真正的下载链接
link = html.select('div.down_bar script')[0]
listlink = str(link).split('"')[1]
ur = listlink.split('/')
# 通过拼接等方法,获取下载链接
RightUrl = ur[0]+'//67.229.159.202/'+ur[3]+'/'+ur[4]+'/'+ur[5]
return RightUrl
else:
print('网页请求出错,获取链接失败...')
except Exception as e:
print(e)
return None
# down_book('大国文娱')
# 主要负责下载小说
def main():
while True:
bookName = input('请输入要下载的小说名字,[n或N退出]:')
if bookName == 'n' or bookName =='N':
break
else:
# 获取文件保存的路径
path = mkdirs()
# 要下载的小说名
book = bookName+'.txt'
flag = checkFile(path,book)
if flag == False:
# 获取下载链接
RightUrl = down_book(bookName)
if RightUrl != None:
print('成功获取链接,正在下载,请耐心等待...')
try:
bookdata = requests.get(url=RightUrl,headers=header)
if bookdata.status_code == 200:
with open(path+book,'wb') as f:
f.write(bookdata.content)
size = os.path.getsize(path+bookName+'.txt')
print('小说下载成功!共%.2fM' %(size/1024/1024))
else:
print('网页请求出错,小说下载失败...')
except Exception as e:
print(e)
else:
print('无法获取链接...')
else:
print('该小说已存在...')
if __name__ == '__main__':
# pass
main()
input('回车键结束...')
只有输入小说名称,就会查找该小说,如果笔趣阁有这部小说,就会自动下载。(在获取小说的实际下载链接的时候,我用了很笨的方法,用了各种切割、拼接。但是如果用selenium,很容易就可以获取了,自己去尝试吧!能力有限,各位海涵~~~)
如果想理解代码,可以自己尝试着单独运行代码中的每个函数,输出它们的值,慢慢理解各个函数的功能。