小项目-下载堆糖网页图片-python爬虫
- 这里是获取单单页的
- 要翻页的话找页面规律或者用selenium,但是这里的下一页按钮是js加载的,可以看到type的类型标有hidden,所以要用driver.execute_script()
一、正则
import requests
import re
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.128 Safari/537.36'
}
class tuitangSpider():
def __init__(self,name):
self.name = name
self.pic_links = []
def getSource(self):
url_base = 'https://www.duitang.com/search/?'
params = {'kw':self.name,'limit':100}
wb_data = requests.get(url_base,params=params,headers=headers).content.decode('utf-8')
return wb_data
def parseSource(self):
tags = tags = re.findall('.*?div data-id(.*?)</div>.*?',self.get