import requests
from lxml import etree
class Request(object):
"""请求类"""
def __init__(self, func):
self.func = func
def __call__(self, url, headers, data=None):
resp = requests.get(url, headers=headers)
resp.encoding = resp.apparent_encoding # 设置响应的编码
content = resp.text
self.func(content)
@Request # get_content=Request(get_content) 对象是否可调用取决于__call__
def get_content(content):
"""解析网页内容"""
htm = etree.HTML(content)
div_list = htm.xpath('//div[@class="one-cont"]')
for div in div_list:
item = dict()
item["nick_name"] = div.xpath('.//i/text()')[0]
item['href'] = ''.join(['https://www.xiaohua.com',div.xpath('.//a/@href')[0]])
item['con']= div.xpath('.//p[@class="fonts"]/a/text()')[0]
print(item)
num = 1
while num <=100:
url = f"https://www.xiaohua.com/duanzi/?page={num}"
headers = {'User-Agent': "Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_8; en-us) AppleWebKit/534.50 (KHTML, like Gecko) Version/5.1 Safari/534.50"}
get_content(url, headers=headers)
num+=1
类装饰器爬取段子
最新推荐文章于 2021-06-11 22:19:45 发布