import time
from lxml import etree
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
# 键盘按键操作
from selenium.webdriver.chrome.options import Options
# 爬取多页数据
def get_goods(bro):
goods = bro.find_elements(By.CLASS_NAME, 'gl-item')
for good in goods:
try:
price = good.find_element(By.CSS_SELECTOR, 'div.p-price i').text
href = good.find_element(By.CSS_SELECTOR, 'div.p-name a').get_attribute('href')
img = good.find_element(By.CSS_SELECTOR, 'div.p-img > a > img').get_attribute('src')
name = good.find_element(By.CSS_SELECTOR, 'div.p-name.p-name-type-2 > a > em').text
if not img:
img = 'https:' + good.find_element(By.CSS_SELECTOR, 'div.p-img > a > img').get_attribute(
'data-lazy-img')
print('''
商品名称:%s
商品价格:%s
商品图片:%s
商品连接:%s
''' % (name, price, img, href))
except Exception as e:
print(e)
continue
next = bro.find_element(By.PARTIAL_LINK_TEXT, '下一页')
next.click()
options = Options()
# 取出浏览器自动化控制
options.add_argument("--disable-blink-features=AutomationControlled")
bro = webdriver.Chrome(options=options)
bro.get('https://www.jd.com/')
bro.implicitly_wait(10)
try:
search_input = bro.find_element(By.ID, 'key')
search_input.send_keys('猫咪饮水机')
# 回车
search_input.send_keys(Keys.ENTER)
# 退格
# search_input.send_keys(Keys.BACK_SPACE)
get_goods(bro)
except Exception as e:
print(e)
finally:
bro.close()
爬虫爬取京东商品信息
于 2023-07-13 20:15:51 首次发布
该代码示例展示了如何利用Python的Selenium库和Chrome浏览器驱动程序抓取京东网站上的商品数据,包括商品名称、价格、图片链接和商品详情链接。通过CSS选择器定位元素,并处理可能的异常情况,实现页面翻页功能。
摘要由CSDN通过智能技术生成