Python爬虫入门3

一、Selenium

1. 元素交互操作

  1. 点击、清除
  2. Actions、Chains
    - 动作链对象,需要把driver驱动传给它
    - 动作链对象可以操作一系列设定好的动作行为
  3. frame的切换
    - switch_to.frame()
  4. 执行js代码
    - execute_script()

2. 代码实现

from selenium import webdriver  # 用来驱动浏览器的
from selenium.webdriver import ActionChains  # 破解滑动验证码的时候用的 可以拖动图片
from selenium.webdriver.common.keys import Keys  # 键盘按键操作
import time

driver = webdriver.Chrome('chromedriver.exe')


# 方式二 慢移
def slowly_move():
    try:
        driver.implicitly_wait(10)
        driver.get('https://www.runoob.com/try/try.php?filename=jqueryui-api-droppable')
        time.sleep(5)

        # 切换至页面内iframe 方法switch_to_frame已弃用
        driver.switch_to.frame('iframeResult')
        # 起始方块id:draggable
        source = driver.find_element_by_id('draggable')
        # 目标方块id:droppable
        target = driver.find_element_by_id('droppable')

        # 方式二
        print('起始:')
        print('大小:' + str(source.size), '\n标签名:' + str(source.tag_name), '\n文本:' + str(source.text),
              '\n坐标:' + str(source.location))
        print('-' * 50)
        print('目标:')
        print('大小:' + str(target.size), '\n标签名:' + str(target.tag_name), '\n文本:' + str(target.text),
              '\n坐标:' + str(target.location))

        # 计算滑动距离
        distance = target.location['x'] - source.location['x']
        # 按住起始滑块
        ActionChains(driver).click_and_hold(source).perform()
        s = 0
        while s < distance:
            # 每次移动两个像素,s和xoffset的值要一致
            ActionChains(driver).move_by_offset(xoffset=2, yoffset=0).perform()
            s += 2
            time.sleep(0.1)

        ActionChains(driver).release().perform()
        time.sleep(10)

    finally:
        driver.close()


# 方式一 快移
def quick_move():
    driver.implicitly_wait(10)
    driver.get('https://www.runoob.com/try/try.php?filename=jqueryui-api-droppable')
    time.sleep(5)
    # 切换至页面内iframe 方法switch_to_frame已弃用
    driver.switch_to.frame('iframeResult')
    action = ActionChains(driver)
    # 起始方块id:draggable
    source = driver.find_element_by_id('draggable')
    # 目标方块id:droppable
    target = driver.find_element_by_id('droppable')
    # 方式一
    # 起始方块瞬间移动到目标方块
    # 拟定好一个动作后再加执行perform()
    action.drag_and_drop(source, target).perform()
    time.sleep(10)


# 点击清除
def jd_search():
    try:
        # 隐式等待
        driver.implicitly_wait(10)
        driver.get('https://www.jd.com')

        # 点击搜索
        input_tag = driver.find_element_by_id('key')
        input_tag.send_keys('围城')

        btn = driver.find_element_by_class_name('button')
        btn.click()
        time.sleep(5)

        # 清除内容
        input_tag2 = driver.find_element_by_id('key')
        input_tag2.clear()
        time.sleep(2)
        input_tag2.send_keys('美的')
        input_tag2.send_keys(Keys.ENTER)

        time.sleep(10)

    finally:
        driver.close()


def js_exec():
    try:
        driver.implicitly_wait(10)
        driver.get('https://www.baidu.com')
        driver.execute_script(
            '''
            alert('Hello, World!');
            '''
        )
        time.sleep(10)
    finally:
        driver.close()


if __name__ == '__main__':
    js_exec()
 

二、获取京东商品列表

  1. 普通版代码
from selenium import webdriver  # 用来驱动浏览器的
from selenium.webdriver.common.keys import Keys  # 键盘按键操作
import time

driver = webdriver.Chrome('chromedriver.exe')

try:
    driver.implicitly_wait(10)
    driver.get('https://www.jd.com')
    input_tag = driver.find_element_by_id('key')
    input_tag.send_keys('抱枕')
    time.sleep(1)
    input_tag.send_keys(Keys.ENTER)
    time.sleep(5)
    # 执行JS滚动
    js_code = '''
        //document.documentElement.cTop = 6000;
        window.scrollTo(0, 5000);
    '''
    driver.execute_script(js_code)
    time.sleep(6)
    # 一定要注意这里是elements
    good_lists = driver.find_elements_by_class_name('gl-item')
    i = 1
    for good in good_lists:
        # print(good)
        good_name = good.find_element_by_css_selector('.p-name em').text
        good_name = good_name.replace('\n', '')
        
        good_url = good.find_element_by_css_selector('.p-name a').get_attribute('href')

        good_price = good.find_element_by_css_selector('.p-price strong').text

        good_commit = good.find_element_by_class_name('p-commit').text

        good_content = f'''
        商品顺序: {i}
        商品名称:{good_name}
        商品链接:{good_url}
        商品价格:{good_price}
        商品评价:{good_commit}
        '''
        print(good_content)
        with open('京东抱枕.txt', mode='a', encoding='utf-8') as f:
            f.write(good_content)
		i += 1
finally:
    driver.close()

  1. 暴力改进版
from selenium import webdriver  # 用来驱动浏览器的
from selenium.webdriver.common.keys import Keys  # 键盘按键操作
import time


def get_good(num, driver):
    try:
        # 执行JS滚动
        js_code = '''
                //document.documentElement.cTop = 6000;
                window.scrollTo(0, 6000);
            '''
        driver.execute_script(js_code)
        time.sleep(6)
        # 一定要注意这里是elements
        good_lists = driver.find_elements_by_class_name('gl-item')
        for good in good_lists:
            # print(good)
            good_name = good.find_element_by_css_selector('.p-name em').text
            good_name = good_name.replace('\n', '')

            good_url = good.find_element_by_css_selector('.p-name a').get_attribute('href')

            good_price = good.find_element_by_css_selector('.p-price strong').text

            good_commit = good.find_element_by_class_name('p-commit').text

            good_content = f'''
            商品顺序: {num}
            商品名称:{good_name}
            商品链接:{good_url}
            商品价格:{good_price}
            商品评价:{good_commit}
            '''
            print(good_content)
            with open('京东抱枕.txt', mode='a', encoding='utf-8') as f:
                f.write(good_content)
            print('写入成功')
            num += 1
        # 下一页
        next_tag = driver.find_element_by_class_name('pn-next')
        next_tag.click()
        time.sleep(5)
        # 递归调用
        get_good(num, driver)
    finally:
        driver.close()


if __name__ == '__main__':
    driver = webdriver.Chrome('chromedriver.exe')
    num = 1
    try:
        driver.implicitly_wait(10)
        driver.get('https://www.jd.com')
        input_tag = driver.find_element_by_id('key')
        input_tag.send_keys('抱枕')
        time.sleep(1)
        input_tag.send_keys(Keys.ENTER)
        time.sleep(5)
        get_good(num, driver)
    finally:
        driver.close()

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值