‘’’
http://www.shicimingju.com/book/index.html
‘’’
import urllib.request
import urllib.parse
from lxml import etree
import os
import time
from urllib.parse import urljoin
class Choose(object):
def get_url():
url=‘http://www.shicimingju.com/book/’
b=ShiShuBook(url)
str=b.get_html(url)
stri=etree.HTML(str)
mulu_code=stri.xpath("//div[@class=‘bookmark-list’]//h2/text()")
mulu_bt=stri.xpath("//div[@class=‘bookmark-list’]//a/text()")
mulu_url=stri.xpath("//div[@class=‘bookmark-list’]//a/@href")
for i in range(len(mulu_code)):
print(mulu_code[i],mulu_bt[i],end=’ ‘)
if (i+1)%5==0:
print(’\n’)
print(’\n’)
num=input(“请输入要下载的序号:”)
snum=num+’、’
if snum in mulu_code:
down_url=‘http://www.shicimingju.com’+mulu_url[int(num)-1]
return (down_url)
else:
print(‘没有找到…’)
exit()
class ShiShuBook(object):
def init(self,url):
self.url=url
self.y_url=self.url[:27]
def get_html(self,url):
headers={
'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.142 Safari/537.36'
}
request=urllib.request.Request(url=url,headers=headers)
response=urllib.request.urlopen(request)
res=response.read().decode()
return(res)
def title_subtitle_suburl(self,html):
tree=etree.HTML(html)
book_title=tree.xpath('//div/h1/text()')
sub_title=tree.xpath('//div[@class="book-mulu"]//ul/li/a/text()')
sub_url=tree.xpath('//div[@class="book-mulu"]//ul/li/a/@href')
return (book_title,sub_title,sub_url)
def get_text(self,html):
tree=etree.HTML(html)
str=tree.xpath('//div[@class="chapter_content"]//text()')
return(str)
def save_file_content(self,book_title,sub_t,sub_u):
book=book_title[0].encode()
book=book.decode('utf-8')
if not os.path.exists(book):
os.mkdir(book)
sub_t=sub_t.replace(' ','-').replace('--','-')
file_name=book+'\\'+sub_t+'.txt'
sub_u=urljoin(self.y_url,sub_u)
html=self.get_html(sub_u)
str=self.get_text(html)
with open(file_name,'w',encoding='utf-8') as fp:
for s in str:
fp.write(s)
print("{} 下载完成".format(sub_t))
time.sleep(1)
def run(self):
html=self.get_html(self.url)
book_title,sub_title,sub_url=self.title_subtitle_suburl(html)
print("开始下载 {} ,请稍侯......".format(book_title[0]))
for i in range(len(sub_title)):#
self.save_file_content(book_title,sub_title[i],sub_url[i])
print("{} 全部下载完毕!!!".format(book_title[0]))
def choose(self):
url='http://www.shicimingju.com/book/'
str=self.get_html(url)
stri=etree.HTML(str)
mulu_code=stri.xpath("//div[@class='bookmark-list']//h2/text()")
mulu_bt=stri.xpath("//div[@class='bookmark-list']//a/text()")
mulu_url=stri.xpath("//div[@class='bookmark-list']//a/@href")
for i in range(len(mulu_code)):
print(mulu_code[i],mulu_bt[i],end=' ')
if (i+1)%5==0:
print('\n')
print('\n')
num=input("请输入要下载的序号:")
snum=num+'、'
if snum in mulu_code:
down_url='http://www.shicimingju.com'+mulu_url[int(num)-1]
self.url=down_url
self.run()
else:
print('没有找到......')
exit()