Python 爬虫爬取coom 课程评论

# -*- coding:gbk -*-

import json
from urllib import request,parse

headers={'User-Agent':'浏览器的User-Agent'
, 'cookie': '你的Cookie' }


def get_couse_count(course_key):
    url='http://www.icourse163.org/web/j/mocSearchBean.searchCourse.rpc?csrfKey=4c029bb4c27e4aaaad9ca7fb694b2396'
    data="mocCourseQueryVo={'keyword':'%s','pageIndex':1,'highlight':'true','orderBy':0,'stats':30,'pageSize':20}"%course_key
    data=bytes(data,'utf-8')
    req=request.Request(url,headers=headers,method='POST')
    rep=request.urlopen(req,data=data)
    result=json.loads(rep.read().decode('utf-8'))
    if result['result']['query']==None: 
        print("没有查询到相关课程!!!")
        return None
    totalcount=result['result']['query']['totlePageCount']
    course_lists=[]
    index=0
    for i in range(1,totalcount+1):
        data="mocCourseQueryVo={'keyword':'%s','pageIndex':%s,'highlight':'true','orderBy':0,'stats':30,'pageSize':20}"%(course_key,i)
        data=bytes(data,'utf-8')
        req=request.Request(url,headers=headers,method='POST')
        rep=request.urlopen(req,data=data)
        result=json.loads(rep.read().decode('utf-8'))
        lists=result['result']['list']
        for course in lists:
            courses={}
            courses['id']=index
            courses['course_id']=course['courseId']
            courses['course_teacher']=course['highlightTeacherNames']
            courses['course_name']=course['highlightName']
            print("id:{},课程id:{},教师名:{},课程名:{}".format( courses['id'],courses['course_id'],courses['course_teacher'],courses['course_name']))
            course_lists.append(courses)
            index+=1    
    return  course_lists       


def get_comment(course_id):
    url='https://kaoyan.icourse163.org/web/j/kaoyanCourseBean.getCourseEvaluatePaginationByCourseId.rpc?csrfKey=4c029bb4c27e4aaaad9ca7fb694b2396'
    data='courseId=%s&pageIndex=1&pageSize=20&orderBy=3'%course_id
    data=bytes(data,'utf-8')
    req=request.Request(url,headers=headers,method='POST')
    rep=request.urlopen(req,data=data)
    result=json.loads(rep.read().decode('utf-8'))
    if result['result']['list']==[]:
        print('该课程还没有评论')
        return None
    
    totlePage=result['result']['query']['totlePageCount']
    for i in range(1,totlePage+1):
        data='courseId=%s&pageIndex=%s&pageSize=20&orderBy=3'%(course_id,i)
        data=bytes(data,'utf-8')
        req=request.Request(url,headers=headers,method='POST')
        rep=request.urlopen(req,data=data)
        result=json.loads(rep.read().decode('utf-8'))
        comments=result['result']['list']
        for com in comments:
            print("用户名:{},评论:{}".format(com['userNickName'],com['content']))



if __name__ == "__main__":
    while True:
        print("请输入课程名或教师(输入-1退出):")
        course_key=input()
        if course_key=='-1':
            break;
        course_lists=get_couse_count(course_key)
        if course_lists==None: continue
        idh=0
        while True:
            print("请输入您要查看评论的课程编号id(退回上一步输入 字母b):")
            idh=input()
            if idh=='b':
                break
            idh=int(idh)
            course=course_lists[idh]
            if course==None: continue
            print("课程名:"+course['course_name']+",评论如下:")
            get_comment(course['course_id'])
    
    

仅供个人学习参考

  • 3
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 1
    评论
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值