import multiprocessing
from lxml import etree
import requests
import re
def novel():
response = requests.get('https://www.17k.com/list/3015690.html') #请求网页 <Response [200]>
response.encoding = 'utf8'
tree = etree.HTML(response.text) #调用HTML类进行初始化,这样就成功构造了一个XPath解析对象
w_list = tree.xpath('//html/body/div[@class="Main List"]/dl[@class="Volume"]/dd/a') #提取每一个a标签的内容
#<a target="_blank" href="/chapter/3015690/38259921.html" title="第一章 和龙王谈交易
url = []
for i in w_list:
href = i.xpath('./@href')[0] #提取href标签的内容
a = 'http://www.17k.com/'
html = a + href
url.append(html)
return url
html_list = novel()
def write_(html_list):
b = 0
for html in html_list:
res = requests.get(html)
res.encoding = 'utf8'
#red = re.compile('<p>(.*?)</p>')
#result = re.findall(red,rec)
tree1 = etree.HTML(res.text)
#b_list = tree1.xpath('//html/body/div[@class="area"]/div[@class="read"]/div[@class="readArea"]/div[@class="readAreaBox content"]/div[@class="p"]/p/text()')
b_list = tree1.xpath('//html/body/div[@class="area"]/div[2]/div[2]/div[1]/div[2]/p/text()')
txt = str(b_list)
#print(txt)
b += 1
with open('D:\\novel\\'+str(b)+'.txt',mode='w') as f:
f.write(txt)
if __name__ == "__main__":
x = int(len(html_list)/2)
p1 = multiprocessing.Process(target=write_,args=(html_list[0:x],))
p2 = multiprocessing.Process(target=write_,args=(html_list[x:],))
p1.start()
p2.start()
p1.join()
p2.join()
print("Over")
HomeWorkDay05---下载小说
最新推荐文章于 2022-11-29 22:13:52 发布