from lxml import etree
发送请求 获取数据
import requests
‘’’ 面向对象设计模式,保证代码规范 ‘’’
class Spider(object):
def init(self):
self.headers = { # 反反爬虫
# ‘Connection’: ‘close’,
“User-Agent”:“Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.96 Safari/537.36”,
“Referer”:“https://www.mzitu.com/tag/ugirls/”,
}
self.pages = [1,2,3]
pass
def start_request(self):
for i in range(1,4):
# 1.获取整站数据
# https://www.mzitu.com/page/2/
response = requests.get('https://www.mzitu.com/page/'+str(i)) # 请求
# print(response.text) # 请求得到的一个 文本内容
html = etree.HTML(response.text)
# 2.获取我们想要的数据 图片标题、图片链接
src_list = html.xpath('//img[@class="lazy"]/@data-original'