主要功能:
1、爬取慕课网课程评价
2、将课程评价保存到Excel
使用:python3.6 selenium Chrome浏览器
python包:selenium、BeautifulSoup、pandas、
部分代码参考:https://blog.csdn.net/weixin_43330908/article/details/82959940
from selenium import webdriver
from bs4 import BeautifulSoup
import pandas as pd
if __name__ == '__main__':
writer = pd.ExcelWriter("./mooc课程评论.xls") #设置保存Excel 路径
driver = webdriver.Chrome(executable_path=".\Chrome\chromedriver.exe") #设置chrome驱动
url = 'https://www.icourse163.org/course/BIT-268001' #设置要爬取的课程链接
#["用户","内容","时间","点赞数","第几次课程"] 待爬取的内容
driver.get(url)
cont = driver.page_source
soup = BeautifulSoup(cont, 'html.parser')
ele = driver.find_element_by_id("review-tag-button") # 点击 课程评价
ele.click()
xyy = driver.find_element_by_class_name("ux-pager_btn__next") # 翻页功能
connt = driver.page_source
soup = BeautifulSoup(connt, 'html.parser') #得到网页源代码
all_table = [] #保存所需数据
all_table.append(["用户","内容","时间","点赞数","第几次课程"])
for i in range(1374): # 共1373页
xyy.click()
connt = driver.page_source
soup = BeautifulSoup(connt, 'html.parser')
content = soup.find_all('div', {
'class': 'ux-mooc-comment-course-comment_comment-list_item_body'}) # 全部评论
for ctt in content:
#获取用户名
user_name = ctt.find("a",{"class":"primary-link ux-mooc-comment-course-comment_comment-list_item_body_user-info_name"})
user_name = user_name.text
print(user_name)
#发布时间
publish_time = ctt.find('div', {'class': 'ux-mooc-comment-course-comment_comment-list_item_body_comment-info_time'})
publish_time = publish_time.text
publish_time = publish_time[4:]
print(publish_time)
#第几次课程
course_nums = ctt.find('div', {'class': 'ux-mooc-comment-course-comment_comment-list_item_body_comment-info_term-sign'})
course_nums = course_nums.text
course_nums = course_nums.replace(" ","")
course_nums = course_nums.replace("\n", "")
print(course_nums)
scontent = []
aspan = ctt.find_all('span')
for span in aspan:
scontent.append(span.string)
#点赞数
like = scontent[5]
#课程内容
scontent = scontent[1]
print(scontent)
all_table.append([user_name,scontent,publish_time,like,course_nums])
#保存到Excel
all_table = pd.DataFrame(all_table)
all_table.to_excel(writer, index=False)
writer.save()