平时喜欢看小说自己写的简单爬虫
利用python来爬取网页上的小说(笔下文学的)
这是爬取并保存的元尊小说的txt文件
代码如下:
import urllib.request
import re
import gzip
from io import BytesIO
from bs4 import BeautifulSoup
// //打开链接
def urlopen(url):
req = urllib.request.Request(url)
req.add_header("User-Agent","Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36")
req.add_header("Accept-Encoding","gzip")
html = urllib.request.urlopen(req)
html = html.read()
buff = BytesIO(html)
f = gzip.GzipFile(fileobj=buff)
html = f.read().decode('utf-8')
return html
// 获取小说名称
def txt_name(url):
html = urlopen(url)
htm = BeautifulSoup(h