这个是老师要求爬两万条数据的时候,顺便做的,我爬取的是《千与千寻》
直接上代码吧
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
import re
import csv
import pymysql
import lxml.html
import time
from redis import StrictRedis,ConnectionPool
#用数据池连接Redis
pool = ConnectionPool(host='localhost',port=6379,db=0,decode_responses=True)
redis = StrictRedis(connection_pool=pool)
#声明浏览器对象
driver = webdriver.Chrome()
url = 'https://movie.douban.com/subject/1291561/'#豆瓣网 千与千寻
start = time.time()
print('开始时间:'+str(start))
m = 'QianyuQ'#数据包的键
try:
driver.get(url)#输入url
button_3 = driver.find_element(By.XPATH,'//*[@id="comments-section"]/div[1]/h2/span/a')#获取全部评论的节点
button_3.click()#点击
#获取评论者名字
names = driver.find_elements(By.XPATH,'//*[@id="comments"]/div/div[2]