# -*- coding:utf-8 -*-
# 爬起笔趣网http://www.biquw.com/
# 都市阴阳师小说
# 作者:筑梦之路 QQ434754018
from bs4 import BeautifulSoup
import requests, sys
class downStore(object):
def __init__(self):
self.server = 'http://www.biquw.com/' #要爬取的网站
self.target = 'http://www.biquw.com/book/67821/' #爬取的小说
self.names = [] #存放目录
self.urls = [] #存放链接
self.nums = 0 #章节数
def getDownUrls(self):
request = requests.get(url = self.target) #发送请求
html = request.text #获取网页源码
bs = BeautifulSoup(html, 'html.parser') #创建对象
content = bs.find_all('div', class_ = 'book_list')
bf = BeautifulSoup(str(content[0]), 'html.parser')
a = bf.find_all('a')
self.nums = len(a)
#循环遍历
for each in a:
self.names.append(each.string)
self.urls.append(self.target + each.get('href'))
def getConents(self, target):
req = requests.get(url = target)
html = req.text
bs = BeautifulSoup(html, 'html.parser')
texts = bs.find_all('div', id = 'htmlContent')
#print('-------------------------------------------')
#print(len(texts))
txt = texts[0].text.replace('\xa0'*8,'\n\n')
return txt
def save_file(self, name, path, text):
write_flag = True
with open(path, 'a', encoding = 'utf-8') as f:
f.write(name + '\n')
f.writelines(text)
f.write('\n\n')
if __name__ == '__main__':
d1 = downStore()
d1.getDownUrls()
print('都市阴阳师小说开始下载:')
'''
print(d1.nums)
print(d1.names)
print(d1.urls)
'''
for i in range(d1.nums):
d1.save_file(d1.names[i], '都市阴阳师.txt', d1.getConents(d1.urls[i]))
sys.stdout.write(" 已下载:%.3f%%" % float(i/d1.nums) + '\r')
sys.stdout.flush()
print("都市阴阳师小说下载完成!")