笔记20210528selenium的使用

selenium的使用

  1. selenium的基础

    from selenium import webdriver
    from selenium.webdriver.common.keys import Keys
    import time
    
    # 1.创建浏览器对象
    # 注意:如果浏览器对象是局部变量,函数结束会自动关闭,如果是全局变量,需要手动关
    b = webdriver.Chrome()
    
    # 2.打开网页
    b.get('https://www.jd.com')
    
    # 3.获取网页内容
    # 注意:在获取浏览器page_source值的时候,只能获取当前浏览器已经加载出来的数据
    # print(b.page_source)
    
    # 4.获取和操作标签
    # 1)输入框操作—>获取输入框—>输入内容—>按回车
    # 根据id值获取输入框
    # b.find_elements()
    # 通过css选择器选中标签
    input = b.find_element_by_css_selector("#key")
    # 在输入框中输入电脑
    input.send_keys('电脑')
    # 在输入框中按回车键
    input.send_keys(Keys.ENTER)
    
    time.sleep(1)
    # 重新拿新页面输入框,再次输入
    input2 = b.find_element_by_css_selector("#key")
    # 清空原来输入框默认内容
    input2.clear()
    # 输入新内容
    input2.send_keys('手机')
    input2.send_keys(Keys.ENTER)
    
    # 5.回退(浏览器.back())
    time.sleep(1)
    b.back()
    time.sleep(1)
    b.back()
    
    # 6.前进(浏览器.forward())
    time.sleep(1)
    b.forward()
    time.sleep(1)
    b.forward()
    
    # 获取按钮标签
    search_btn = b.find_element_by_css_selector(".button.cw_icon")
    # 点击按钮
    search_btn.click()
    
    # 关闭浏览器
    # b.close()
    
  2. selenium选项卡

    from selenium import webdriver
    import time
    
    b = webdriver.Chrome()
    b.get("https://www.jd.com")
    
    # 获取秒杀的a标签
    miaosha = b.find_element_by_css_selector("#navitems-group1>li>a")
    miaosha.click()
    
    # 获取所有选项卡
    print(b.window_handles)
    
    time.sleep(2)
    # 切换选项卡
    b.switch_to.window(b.window_handles[0])
    time.sleep(2)
    b.switch_to.window(b.window_handles[1])
    
  3. selenium获取网页cookies

    from selenium import webdriver
    from selenium.webdriver.common.keys import Keys
    import time
    
    url = "https://www.taobao.com"
    # 创建浏览器对象
    b = webdriver.Chrome()
    b.get(url)
    # 获取输入框,输入电脑并点击跳转
    input = b.find_element_by_css_selector("#q")
    input.send_keys("电脑")
    input.send_keys(Keys.ENTER)
    
    # 进入登录页面,等待人工操作登陆页面
    time.sleep(40)
    
    # 人工登录过后获取cookie值,保存到本地文件中
    cookies = b.get_cookies()
    # print(cookies)
    
    with open('./files/taobao_cookies.txt', 'w', encoding='utf-8') as f:
        f.write(str(cookies))
    
  4. selenium使用cookies

    from selenium import webdriver
    import time
    from selenium.webdriver.common.keys import Keys
    # 创建浏览器
    b = webdriver.Chrome()
    b.get('https://www.taobao.com')
    
    # 设置cookies
    with open(r'./files/taobao_cookies.txt', 'r', encoding='utf-8') as f:
        py_obj = eval(f.read())
        for obj in py_obj:
            if obj['secure']:
                b.add_cookie(obj)
    
  5. 设置页面滚动,爬取京东数据

    def get_net_data():
        global b
        b = webdriver.Chrome()
        data = []
        j = 0
        for i in range(1, 200, 2):
            if i == 1:
                b.get('https://search.jd.com/Search?keyword=%E7%94%B5%E8%84%91&pvid=2d0c277e2ea34ae8bb95598ad3a04f48&page=1&s=1&click=0')
            elif i == 3:
                b.get('https://search.jd.com/Search?keyword=%E7%94%B5%E8%84%91&pvid=2d0c277e2ea34ae8bb95598ad3a04f48&page=3&s=56&click=0')
            else:
                j += 1
                b.get(f'https://search.jd.com/Search?keyword=%E7%94%B5%E8%84%91&pvid=2d0c277e2ea34ae8bb95598ad3a04f48&page={i}&s={60*j+56}&click=0')
            time.sleep(0.5)
    
            # 提前设置滚动距离
            max_height = 8000
            # 每次滚动的位置
            y = 0
            while True:
                y += 500
                b.execute_script(f'window.scrollTo(0, {y})')
                if y > max_height:
                    break
                time.sleep(0.2)
            data.append(b.page_source)
        return data
    
    
    def analysis_data(data):
        all_info = []
        for data_item in data:
            soup = BeautifulSoup(data_item, 'lxml')
            li_list = soup.select('#J_goodsList>ul>li')
            all_page_info = []
            for li in li_list:
                brand = (li.select_one('.p-name.p-name-type-2>a>em').get_text()).split(' ')[0]
                picture = 'https:' + str(li.select_one('img').attrs.get('src'))  # str
                link = 'https:' + li.select_one('a').attrs['href']   # str
                price = str(li.select_one('i').string)
                evalation1 = li.select_one('strong>a').get_text()   # str
                # storage = li.select_one('.curr-shop.hd-shopname').get_text()   # str
                store_link = 'https:' + str(li.select_one('.curr-shop.hd-shopname').attrs['href'])   # str
                # label = li.select_one('.goods-icons.J-picon-tips.J-picon-fix')
                all_page_info.append([brand, picture, link, price, evalation1, store_link])
            all_info.append(all_page_info)
        return all_info
    
    
    if __name__ == '__main__':
        # analysis_data(get_net_data())
        all_info = analysis_data(get_net_data())
        with open('./files/京东电脑所有信息.csv', 'w', newline='', encoding='utf-8') as f:
            writer = csv.writer(f)
            writer.writerow(['电脑品牌', '图片地址', '电脑链接', '电脑价格', '评价人数',  '店铺链接'])
            for item in all_info:
                writer.writerows(item)
    
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值