爬虫day3接口和selenium基础总结
一、selenium的基本用法
from selenium.webdriver import Chrome
from bs4 import BeautifulSoup
from re import search
1.创建一个浏览器对象
b = Chrome()
2.打开网页
b.get('https://cd.zu.anjuke.com/?from=navigation')
3.获取网页数据(能获取到的数据一定是网页加载出来的数据)
# print(b.page_source)
result = b.page_source
4.解析
soup = BeautifulSoup(result, 'lxml')
all_house_div = soup.select('#list-content>.zu-itemmod')
for house in all_house_div:
title = house.select_one('.strongbox').text
message = house.select_one('.details-item').text
area = search(r'\|(.+?)\|', message).group(1)
price = house.select_one('.zu-side>p').text
二、selenium控制网页
from selenium.webdriver import Chrome
from selenium.webdriver.common.keys import Keys
import time
b = Chrome()
b.get('https://www.jd.com/')
# 1. 自动在输入框中输入内容
# 1)获取页面上的输入框
search_box = b.find_element_by_id('key')
# search_box = b.find_element_by_css_selector('#key')
# 2)向输入框中输入内容
search_box.send_keys('酸奶')
# 3) 按回车
# search_box.send_keys(Keys.ENTER)
# 4) 按搜索键
# a.找到搜索按钮
search_btn = b.find_element_by_css_selector('.button')
# b.点击按钮
search_btn.click()
time.sleep(1)
print(b.page_source)