from selenium import webdriver
option = webdriver.ChromeOptions()
option.add_argument('headless')
from selenium.webdriver.common.action_chains import ActionChains
import csv
import time
def jddata():
commts=[]
for i in range(1,11,1):
cmts = wd.find_elements_by_css_selector('#comment-6 .comment-item')
list=analyse(cmts)
commts =commts +list
print("翻第", i, "页")
next = wd.find_element_by_xpath('// *[ @ id = "comment-6"] / div[12] / div / div / a[7]') # 点击下一页
actions.move_to_element(next)
time.sleep(1)
wd.execute_script("arguments[0].click();", next)
time.sleep(3)
save_csv(commts)
return commts
def analyse(cmts):
list = []
for i in cmts:
name = i.find_element_by_class_name("user-info").text
content = i.find_element_by_class_name("comment-con").text
time = i.find_element_by_class_name("order-info").find_elements_by_tag_name("span")[-1].text
print(name + "," + time)
list.append([name, time, content])
return list
def save_csv(commts):
with open('D:\Python\京东差评.csv', 'w',encoding='0') as file:
writer = csv.writer(file)
writer.writerow(["发布者", "发布时间", "发布内容"])
for i in commts:
writer.writerow([i[0], i[1], i[2]])
file.close()
print("文件存储完毕")
if __name__ == "__main__":
option = webdriver.ChromeOptions()
# option.add_argument('headless')#无界面模式
wd = webdriver.Chrome(r'E:\360浏览器下载\chromedriver_win32\chromedriver.exe', options=option)
wd.get('https://item.jd.com/883575.html') # 控制浏览器跳转到这个网页
print("进入网页")
actions = ActionChains(wd)
wd.implicitly_wait(10)
time.sleep(15)
button = wd.find_element_by_xpath("//li[@clstag='shangpin|keycount|product|shangpinpingjia_1']") # 获取商品评论按钮
button.click() # 控制按钮进行点击
time.sleep(15)
# wd.switch_to.frame(wd.find_element_by_css_selector('div .current >a'))
wd.find_element_by_xpath("//li[@clstag='shangpin|keycount|product|chaping']/a").click() # 点击差评
time.sleep(15)
jddata()
用selenium爬取京东某商品的差评100条
最新推荐文章于 2024-04-25 08:57:35 发布