今日
#coding:utf-8
import re
import sys
import bs4
from bs4 import BeautifulSoup
import requests
import xlwt
import tkinter as tk
from tkinter import messagebox
def get_download(url):
#防止ip加入黑名单
req_header = {
‘Accept’:‘text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,/;q=0.8,application/signed-exchange;v=b3’,
‘Accept-Language’:‘zh-CN,zh;q=0.9,en-US;q=0.8,en;q=0.7’,
‘Cache-Control’:‘max-age=0’,
‘Proxy-Connection’:‘keep-alive’,
‘User-Agent’:‘Mozilla/5.0 (iPhone; CPU iPhone OS 9_1 like Mac OS X) AppleWebKit/601.1.46 (KHTML, like Gecko) Version/9.0 Mobile/13B143 Safari/601.1’,
‘Content-Type’:‘text/html;charset=utf-8’,
‘Cookie’: ‘bcolor=; font=; size=; fontcolor=; width=; PPad_id_PP=2’,
}
#抓取页码内容,返回响应对象
response = requests.get(url,headers=req_header)
#查看响应状态码
status_code = response.status_code
#使用BeautifulSoup解析代码,并锁定页码指定标签内容
content = bs4.BeautifulSoup(response.content.decode("utf-8"), "lxml")
section_name = content.find("div", class_='bookname').h1.string #章节
section_name = section_name.strip('\r\n')
section_text = content.find("div", id='content').text #内容
section_text = re.sub( '\s+', '\r\n\t', section_text).strip('\r\n') #去除空格
#print(section_text)
fname = content.find("div", class_='con_top').find_all('a')
fname = fname[2].string
fp = open(fname+'.txt',"a",encoding='utf-8')
fp.write(section_name+'\n')
fp.write(section_text+'\n')
fp.close()
nexturl_num = content.find("div", class_='bottem1').find_all('a')
nexturl_num = nexturl_num[2]['href'] #下一章路径
values = [section_name,nexturl_num]
return values
def start(url,indexurl,oldurl):
messagebox.showwarning(“提示”, “下载开始”)
num = ‘完成’
while(True):
values = get_download(url)
nexturl = values[1]
num = values[0]
url = indexurl+nexturl
if(nexturl == oldurl): #如果最后一章下载完成则结束下载
break
#print(“下载完成”)
messagebox.showwarning(“提示”, “下载至”+num)
if name == ‘main’:
x = tk.Tk()
x.title(“看书啦,小说下载TxT”)
label1 = tk.Label(x,text = “首章节网址:”)
label1.grid(row=0,column=0)
label2 = tk.Label(x,text = “未章节网址:”)
label2.grid(row=1,column=0)
label3 = tk.Label(x,text = “总网址:”)
label3.grid(row=2,column=0)
entry1 = tk.Entry(x,bd = 2,width=50)
entry1.grid(row=0,column=1)
entry2 = tk.Entry(x,bd = 2,width=50)
entry2.grid(row=1,column = 1)
entry3 = tk.Entry(x,bd = 2,width=50)
entry3.grid(row=2,column = 1)
entry1.insert(‘insert’,‘https://www.kanshula.com/book/’)
entry2.insert(‘insert’,’/book/’)
entry3.insert(‘insert’,‘https://www.kanshula.com’)
def setstart():
url = entry1.get()
indexurl = entry2.get()
oldurl = entry3.get()
if(url == '' or indexurl =='' or oldurl == ''):
messagebox.showwarning("提示", "请将网站填写完整")
else:
start(url,oldurl,indexurl)
btn1 = tk.Button(x,text = "开始下载",command = setstart)
btn1.grid(row = 3,column = 0)
x.mainloop()