对京东某商品的评论进行爬虫并保存
import random
from selenium import webdriver
from selenium.webdriver import ActionChains
from selenium.webdriver.support.ui import WebDriverWait
import time
from bs4 import BeautifulSoup
import pandas as pd
from selenium.webdriver.common.keys import Keys
browser = webdriver.Chrome(executable_path='C:\Program Files (x86)\Google\Chrome\Application\chromedriver.exe')
browser.get('https://item.jd.com/100007264815.html')
browser.switch_to.window(browser.window_handles[0])
time.sleep(3)
browser.switch_to.window(browser.window_handles[-1])
comments = []
for k in range (0,100):
try:
js = "var q=document.documentElement.scrollTop=1000000"
browser.execute_script(js) # 使用javascript命令,网页下拉到最底部
elements = browser.find_element_by_xpath('//*[@id="comment-4"]')
InnerElement = elements.get_attribute('innerHTML')
soup = BeautifulSoup(InnerElement, 'lxml')
comments_data = soup.find_all('p', {'class': 'comment-con'})
if not comments_data:
print('无评论')
break
for j in comments_data:
comments.append(j.get_text())
print(comments)
time.sleep(random.random() * 5 + 1)
except Exception as e:
print(e)
try:
element = browser.find_element_by_css_selector(
'#comment-4 > div.com-table-footer > div > div > a.ui-pager-next')
browser.execute_script("arguments[0].click();",
element) # comment-4 > div.com-table-footer > div > div > a.ui-pager-next
time.sleep(1)
except Exception as e:
print(e)
df = pd.DataFrame(comments)
df.to_csv('狗粮好评.csv', index=False, encoding='utf_8_sig')