python版本:3.8.1
selenium版本:3.141.0
实现京东自动搜索商品,并把搜索结果中的商品信息保存到本地;
保存信息:1.店铺名称,2.商品名,3.价格,4.评价数量,5.详情链接
import time
import csv
from selenium import webdriver;
from selenium.webdriver.common.by import By
f=open('./Info.csv',mode='a',encoding='utf-8-sig',newline='');
csv_write=csv.DictWriter(f,fieldnames=[
'序号',
'店铺名称',
'商品名',
'价格',
'评价数量',
'详情链接'
])
csv_write.writeheader();#写入表头
#下滑浏览器,让数据全部加载出来
def drop_down():
for x in range(1,12,2):
time.sleep(1);
j=x/9;
js='document.documentElement.scrollTop = document.documentElement.scrollHeight * %f' % j;
driver.execute_script(js);
def get_info():
#5。获取商品数据信息
drop_down();
dataList = driver.find_elements(By.CSS_SELECTOR,'.goods-list-v2 .gl-item');
print(dataList);
count=1;
for li in dataList:
print('序号:'+str(count));
shopName = li.find_element(By.CSS_SELECTOR,'.p-shop span a').text;
print('店铺名称:'+shopName);
title = li.find_element(By.CSS_SELECTOR,'.p-name-type-2 em').text.replace('\n','');
print('商品名:'+title);
pric = li.find_element(By.CSS_SELECTOR,'.p-price strong i').text;
print('价格:'+pric);
comment = li.find_element(By.CSS_SELECTOR,'.p-commit strong a').text;
print('评价数量:'+comment);
info = li.find_element(By.CSS_SELECTOR,'.p-img a').get_attribute('href');
print('详情链接:'+info);
print('------------------------------------')
dicInfo={
'序号':count,
'店铺名称':shopName,
'商品名':title,
'价格':pric,
'评价数量':comment,
'详情链接':info,
}
csv_write.writerow(dicInfo);
count+=1;
if __name__ == '__main__':
print('请输入要搜索的商品:')
_name = input();
print('请输入要获取多少页信息:')
_Num = input();
try:
options = webdriver.ChromeOptions();
options.add_experimental_option('detach', True); # 解决浏览器自动退出问题
# 1.实例化浏览器
driver = webdriver.Chrome(options=options);
# 2.访问网站
driver.get('https://www.jd.com/');
# 3.定位到搜索栏,并输入内容
driver.find_element(By.CSS_SELECTOR, '#key').send_keys(_name);
# 4.定位搜索按钮并且点击
driver.find_element(By.CSS_SELECTOR, '#search > div > div.form > button').click();
# 当跳转一个网页时候,数据需要加载,所以要等带
driver.implicitly_wait(10);
for page in range(1,int(_Num) + 1):
print('正在采集第'+ str(page) + '页信息...')
get_info();
driver.find_element(By.CSS_SELECTOR,'#J_bottomPage > span.p-num > a.pn-next').click();
print('------信息采集完毕------')
except Exception as e:
print(e)