爬取射雕英雄传连载版
手机上的一本连载版居然没有目录,哎,没目录看着可难受了.
趁着有空,就写了一个脚本提取了下连载版.下次有空的话,再优化下看能不能一次提取金庸作品集.
from bs4 import BeautifulSoup
from urllib import request
import re
html_addr = "http://www.jinyongwang.com/oshe/"
html_content = request.urlopen('http://www.jinyongwang.com/oshe/').read()
soup = BeautifulSoup(html_content,'html.parser')
author = str(soup.select('.author')[0])
author_content = re.findall('<p class="author">(.*?)<a',author)[0]
author_name = re.findall('/">(.*?)</a></p>',author)[0]
author_info = author_content + author_name
all_content = soup.select(".mlist li")
with open('射雕英雄传(连载版).txt','w+',encoding='utf-8') as f:
f.write(author_info)
f.write('\n'*3)
for one_content in all_content:
chapter_info = re.findall(">