selenium基础学习
有很多时候我们处理的都是ajax的网址 所以我们可能需要等待一些操作
案例说明 等待
from selenium import webdriver
import os
import time
# 实例化对象
file = os.path.join(os.path.dirname(__file__),'chromedriver.exe')
driver = webdriver.Chrome(file)
# driver.get('https://www.baidu.com')
# 隐士等待
# 延迟五秒
# driver.implicitly_wait(5)
# 显示等待
# 显示等待就是可以让网页满足一些条件在进行下一步
# 模拟火车票
from selenium.webdriver.common.by import By
# 导入需要的模块
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
driver.get('https://kyfw.12306.cn/otn/leftTicket/init?linktypeid=dc')
driver.find_element_by_id('qd_closeDefaultWarningWindowDialog_id').click()
# 出发地的条件 两个参数一个驱动一个时间
WebDriverWait(driver,100).until(
# 两个参数 第一个元组锁定位置 第二个值
EC.text_to_be_present_in_element_value((By.ID, 'fromStationText'), '重庆')
)
# 目的地的条件
WebDriverWait(driver,100).until(
EC.text_to_be_present_in_element_value((By.ID, 'toStationText'), '上海')
)
driver.find_element_by_id('query_ticket').click()
driver.find_element_by_xpath('//*[@id="date_range"]/ul/li[8]').click()
操作cookie
# @Time : 2021/1/4 20:27
# @Author : Jerry
# @File : selenium操作cookie.py
# @Software: PyCharm
# cookie的作用
# 1 模拟登录
# 2 反反爬
from selenium import webdriver
import requests
import time
import json
import os
file = os.path.join(os.path.dirname(__file__),'chromedriver.exe')
driver = webdriver.Chrome(file)
# driver.get('https://www.baidu.com/')
# # 获取cookie get_cookies() 返回的是一个列表
# cookies = driver.get_cookies()
# for cookie in cookies:
# print(cookie)
# 模拟登录QQ空间
# https://xui.ptlogin2.qq.com/cgi-bin/xlogin?proxy_url=https%3A//qzs.qq.com/qzone/v6/portal/proxy.html&daid=5&&hide_title_bar=1&low_login=0&qlogin_auto_login=1&no_verifyimg=1&link_target=blank&appid=549000912&style=22&target=self&s_url=https%3A%2F%2Fqzs.qzone.qq.com%2Fqzone%2Fv5%2Floginsucc.html%3Fpara%3Dizone&pt_qr_app=手机QQ空间&pt_qr_link=http%3A//z.qzone.com/download.html&self_regurl=https%3A//qzs.qq.com/qzone/v6/reg/index.html&pt_qr_help_link=http%3A//z.qzone.com/download.html&pt_no_auth=0
# https://user.qzone.qq.com/qq账号 QQ空间的地址
# 第一种 像Url发起post请求携带账号密码进行登录
# 第二种 拿到cookie值进行模拟登录
driver.get('https://xui.ptlogin2.qq.com/cgi-bin/xlogin?proxy_url=https%3A//qzs.qq.com/qzone/v6/portal/proxy.html&daid=5&&hide_title_bar=1&low_login=0&qlogin_auto_login=1&no_verifyimg=1&link_target=blank&appid=549000912&style=22&target=self&s_url=https%3A%2F%2Fqzs.qzone.qq.com%2Fqzone%2Fv5%2Floginsucc.html%3Fpara%3Dizone&pt_qr_app=手机QQ空间&pt_qr_link=http%3A//z.qzone.com/download.html&self_regurl=https%3A//qzs.qq.com/qzone/v6/reg/index.html&pt_qr_help_link=http%3A//z.qzone.com/download.html&pt_no_auth=0')
button = driver.find_element_by_class_name('face')
button.click()
time.sleep(3)
# driver.current_url
# driver.page_source
# 获取cookie
listCookies = driver.get_cookies()
# print(type(listCookies))
# print('='*100)
jsonCookies = json.dumps(listCookies)
# print(type(jsonCookies))
# 保存cookie
with open('qqzone.json','w') as file_obj:
file_obj.write(jsonCookies)
# 我们获取到的cookie能不能用? ;
# 处理获取到的值变成网址识别的cookie模式
cookie = [item['name'] + '=' + item['value']for item in listCookies]
cookie_str = '; '.join(item for item in cookie)
# print(cookie_str)
url = 'https://user.qzone.qq.com/1417464405'
headers = {
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36',
'cookie':cookie_str
}
html = requests.get(url,headers=headers)
print(html.text)
with open('qzong.html','w',encoding='utf-8') as f:
f.write(html.text)
操作多个网页
from selenium import webdriver
import os
import time
# 实例化对象
file = os.path.join(os.path.dirname(__file__),'chromedriver.exe')
driver = webdriver.Chrome(file)
driver.get('https://www.baidu.com')
# 可以执行js脚本 通过执行js脚本打开另一个网页
driver.execute_script('window.open("https://douban.com")')
# 因为是百度为基础的 所以说driver.close()关掉的是百度
# driver.close()
# 如果要操作豆瓣 切换网址
# driver.switch_to_window() 切换到我们想操作的那个网页
driver.switch_to.window(driver.window_handles[1])
print(driver.current_url)