1 '''<--fumingyao 2019,8,28-->''' 2 from selenium import webdriver # 导入selenium自动化测试模块 3 from lxml import html # 导入xpath解析模块 4 import time # 导入时间模块 5 import requests # 导入爬虫request模块 6 import random # 随机生成数字模块 7 etree = html.etree # 实例化etree 8 9 browser = webdriver.Chrome('chromedriver.exe') # 实例化selenium模块 10 11 url = 'https://image.baidu.com/' # 获取要访问的url 12 13 browser.get(url) # 进行访问 14 input_box = browser.find_element_by_id('kw') # 获取该输入框 15 input_box.send_keys('石原里美') # 输入数据 16 time.sleep(3) # 延迟3秒 17 input_click = browser.find_element_by_class_name('s_search') # 获取搜索按钮 18 input_click.click() # 点击按钮 19 time.sleep(3) # 延迟三秒 20 for i in range(3): # 进行滑动加载数据使用js代码 21 browser.execute_script('window.scrollTo(0, document.body.scrollHeight)') 22 time.sleep(2) 23 text_s = browser.page_source # 将获取的页面转化成text类型 24 time.sleep(5) # 延迟5秒 25 tree = etree.HTML(text_s) # 实例化xpath 26 text_url = tree.xpath('//div[@class="imgpage"]/ul/li') # 进行第一次解析,获取li 27 for i in text_url: # 因li标签为多条 进行遍历 28 img_link = i.xpath("./div/a/img/@data-imgurl")[0] # 进行第二次解析,获取都想要的数据因为数据的格式是list,所以进行索引取值[0] 29 # print(img_link) 30 if 'https' in img_link: # 判断数据里面有没有‘https’ 31 pass 32 else: 33 img = img_link.replace('http','https') # 如果数据为http所以换成https 34 headers = { 35 'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.100 Safari/537.36' 36 } 37 img_url = requests.get(url=img_link,headers=headers) # 发起请求 38 print(img_url) 39 with open('./link/%s.jpg'%random.randint(1,1000),'wb')as f: # 写入数据,随机生成文件名 40 # print(img_url.content) 41 f.write(img_url.content) # 因为是图片所以为二进制流(content)