# -*- coding: utf-8 -*-
import requests
from lxml import etree
class BookSpider(object):
def __init__(self):
self.url = "http://www.jianlaixiaoshuo.com/"
self.base_url = "http://www.jianlaixiaoshuo.com/"
self.headers = {
"Use_Agent": "Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_8; en-us) AppleWebKit/534.50 (KHTML, like Gecko) Version/5.1 Safari/534.50"}
#请求网页内容
def get_html(self,url):
html = requests.get(url,headers = self.headers).content.decode()
return html
#封装xpath
def get_xpath(self,html,pattern):
p = etree.HTML(html)
result = p.xpath(pattern)
return result
#保存数据
def save_data(self, data):
with open('剑来.txt','a',encoding='utf-8')as f:
f.write(data)
#下载数据
def down_load(self,url):
html = self.get_html(self.url)
爬虫学习笔记:爬取笔趣阁小说
最新推荐文章于 2024-04-06 13:32:08 发布