使用selenium抓取京东数据
from selenium.webdriver import Chrome
from selenium.webdriver.common.by import By
from csv import writer
import time, os
# 创建浏览器对象
b = Chrome()
# 获取网页
b.get(r'https://search.jd.com/Search?keyword=%E7%94%B5%E9%A5%AD%E9%94%85&enc=utf-8&wq=%E7%94%B5%E9%A5%AD%E9%94%85&pvid=20d97125d00a409fb95d2735aeb0a7c6')
def goods(p=1):
gl = []
for i in range(p):
# 预先加载翻页
for x in range(7):
b.execute_script('window.scrollBy(0, 1000)')
time.sleep(1)
# 获取商品列表
time.sleep(5)
goods_list = b.find_elements(By.CSS_SELECTOR, '#J_goodsList>ul>li>div')
for x in goods_list:
# 获取商品价格
price = x.find_element(By.CSS_SELECTOR, '.p-price i').text
# 商品介绍
introduce = '-'.join(x.find_element(By.CSS_SELECTOR, '.p-name-type-2 em').text.split('\n'))
# 商品评论
comments = x.find_element(By.CSS_SELECTOR, '.p-commit strong').text
# 商品店铺
try:
store = x.find_element(By.CSS_SELECTOR, '.p-shop span').text
store_url = x.find_element(By.CSS_SELECTOR, '.p-shop>span>a').get_attribute('href')
except:
store = '厂家直销'
store_url = '暂无厂家链接'
# 详情页网址
detail_url = x.find_element(By.CSS_SELECTOR, '.p-name-type-2>a').get_attribute('href')
# 店铺详情网址
gl.append([price, introduce, comments, store, detail_url, store_url])
print(f'第{i + 1}页加载完成')
next_page = b.find_element(By.CLASS_NAME, 'pn-next')
next_page.click()
time.sleep(5)
return gl
if __name__ == '__main__':
if not os.path.exists(r'file'):
os.mkdir(r'file')
f = open(r'file/goods.csv', 'w', encoding='utf-8', newline='')
w1 = writer(f)
goods_list = goods(10)
w1.writerows(goods_list)
f.close()