【Python爬虫】爬取网易云评论

爬取网易云音乐最新评论

import requests
import time
import json
import datetime
import xlwt
headers = {
        'Host': 'music.163.com',
        'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36'
}
def get_comments(songId,page):
    url = 'http://music.163.com/api/v1/resource/comments/R_SO_4_{0}?limit=100&offset={1}'.format(songId,str(page)) #字符串格式化
    response = requests.get(url=url, headers=headers)
    result = json.loads(response.text)
    temp = []
    for item in result['comments']:
        data = {}
        # 评论者id
        data['userId'] = item['user']['userId']
        # 评论者昵称
        data['nickname'] = item['user']['nickname']
        # 评论内容
        data['content'] = item['content'].replace("\n",",")
        # 评论发表时间
        timeArray = time.localtime(item['time'] / 1000)
        date = time.strftime("%Y-%m-%d %H:%M:%S", timeArray)
        data['date'] = date
        # 点赞数
        data['likedCount'] = item['likedCount']
        temp.append(data)
    return temp
if __name__ == '__main__':
    list = ["评论者id","评论者昵称","评论内容","评论发表时间","点赞数"]
    workbook = xlwt.Workbook(encoding="utf-8")
    worksheet = workbook.add_sheet("sheet1",cell_overwrite_ok=True)  # 创建工作表
    for index, item in enumerate(list):
        worksheet.write(0, index, item)
    for i in range(0,1101,100):
        temps = get_comments("1330348068", i)
        count = i + 1
        for row_index,temp in enumerate(temps):
            for col_index, item in enumerate(temp.values()):
                worksheet.write(count+row_index, col_index, item)
    workbook.save('起风了评论表.xls')

爬取网易云音乐热评

在这里插入图片描述

import requests
import time
import json
import xlwt
headers = {
        'Host': 'music.163.com',
        'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36'
}
def get_comments(songId):
    url = 'http://music.163.com/api/v1/resource/comments/R_SO_4_{0}?limit=100&offset=0'.format(songId) #字符串格式化
    response = requests.get(url=url, headers=headers)
    result = json.loads(response.text)
    temp = []
    for item in result['hotComments']:
        data = {}
        # 评论者id
        data['userId'] = item['user']['userId']
        # 评论者昵称
        data['nickname'] = item['user']['nickname']
        # 评论内容
        data['content'] = item['content'].replace("\n",",")
        # 评论发表时间
        timeArray = time.localtime(item['time'] / 1000)
        date = time.strftime("%Y-%m-%d %H:%M:%S", timeArray)
        data['date'] = date
        # 点赞数
        data['likedCount'] = item['likedCount']
        temp.append(data)
    return temp
if __name__ == '__main__':
    list = ["评论者id","评论者昵称","评论内容","评论发表时间","点赞数"]
    workbook = xlwt.Workbook(encoding="utf-8")
    worksheet = workbook.add_sheet("sheet1",cell_overwrite_ok=True)  # 创建工作表
    for index, item in enumerate(list):
        worksheet.write(0, index, item)
    for row_index,temp in enumerate(get_comments("566436427")):
        for col_index, item in enumerate(temp.values()):
            worksheet.write(row_index+1, col_index, item)

    workbook.save('起风了热评表.xls')

jxl的使用总结(java操作excel)

界面

网易云音乐(Cloudmusic)API

from origin ‘http://localhost:8080’ has been blocked by CORS policy:
遇到了跨域问题
在这里插入图片描述
解决vue-cli项目开发中跨域问题

在这里插入图片描述

proxyTable: {
      '/api': {
        target: 'https://music.163.com', // 目标接口的域名
        secure: false,                   // 如果是https,就要加上
        changeOrigin: true,             // 是否跨域
        pathRewrite: {
          '^api': ''               //重写接口
        }
        // 如果不写pathRewrite,则实际会访问地址为https://music.163.com/api/...
        // 如果写了pathRewrite,则实际会访问的地址为https://music.163.com/..
      }
    }

在这里插入图片描述
展示:
在这里插入图片描述
在这里插入图片描述
在这里插入图片描述
在这里插入图片描述

涉及知识:vue + element-ui + axios + vuex + router

<template>
<div style="width: 90%;margin: 0 auto">
  <el-row style="padding: 20px;">
    <el-col :span="7">
      <el-input v-model="songName" placeholder="请输入歌曲名称"></el-input>
    </el-col>
    <el-col :span="6">
      <el-button type="primary" icon="el-icon-search" @click="searchSongByName" >搜索</el-button>
    </el-col>
  </el-row>
  <!--表格带边框、绑定的数据、设置标签居中、设置文字内容居中-->
  <el-table stripe border
            :data="tableData"
            :header-cell-style="{'text-align':'center'}"
            :cell-style="{'text-align':'center'}"
            style="width: 80%;">
    <el-table-column prop="name" label="歌曲名称" width="180">
    </el-table-column>
    <el-table-column prop="songer" label="歌手" width="180">
    </el-table-column>
    <el-table-column prop="time" label="时间">
    </el-table-column>
    <el-table-column label="操作">
      <template slot-scope="scope">
        <el-button type="text" @click="queryComment(scope.row.id,'hotComments')">查看热评</el-button>
        <el-button type="text" @click="queryComment(scope.row.id,'comments')">查看最新评论</el-button>
      </template>
    </el-table-column>
  </el-table>
</div>
</template>

<script>
export default {
  name: '',
  data () {
    return {
      songName: '起风了',
      tableData: []
    }
  },
  methods: {
    searchSongByName () {
      this.$axios.get('https://api.imjad.cn/cloudmusic/', {
        params: {
          type: 'search',
          search_type: 1,
          s: this.songName
        }
      })
        .then(res => {
          this.tableData = []
          for (let i = 0; i < res.data.result.songs.length; i++) {
            let song = {}
            song.id = res.data.result.songs[i].id
            song.name = res.data.result.songs[i].name
            song.songer = res.data.result.songs[i].ar[0].name
            song.time = this.secTotime(res.data.result.songs[i].dt / 1000)
            this.tableData.push(song)
          }
        }).catch(err => {
          console.log(err)
        })
    },
    // 将秒转化为时分秒,由于一首歌时间几乎不会超过小时,所以去掉hour
    secTotime (secs) {
      var time
      if (secs > -1) {
        var min = Math.floor(secs / 60) % 60
        var sec = secs % 60
      }
      if (min < 10) {
        time = '0'
      }
      time += min + ':'
      if (sec < 10) {
        time += '0'
      }
      time += sec.toFixed(0) // 不要小数点后面的
      return time
    },
    queryComment (songId, commentType) {
      this.$router.push('/songComments') // 实现路由跳转
      this.$store.commit('setSongIdAndCommentType', {songId: songId, commentType: commentType})
    }
  },
  created () {
    this.searchSongByName()
  }
}
</script>

<style scoped>

</style>

<template>
<div style="width: 100%;margin: 0 auto">
  <el-row style="padding: 20px;">
    <el-col :span="6">
      <el-button type="success" plain icon="el-icon-back" @click="back">返回</el-button>
    </el-col>
    <el-col :span="6">
      <el-button type="primary" icon="el-icon-search" @click="queryCommentByIdAndType" style="float: right">搜索</el-button>
    </el-col>
  </el-row>
  <!--表格带边框、绑定的数据、设置标签居中、设置文字内容居中-->
  <el-table stripe border
            :data="tableData"
            :header-cell-style="{'text-align':'center'}"
            :cell-style="{'text-align':'center'}"
            style="width: 100%;">
    <el-table-column
      prop="userId"
      label="评论者id"
      width="180">
    </el-table-column>
    <el-table-column
      prop="nickname"
      label="评论者昵称"
      width="180">
    </el-table-column>
    <el-table-column
      prop="content"
      label="评论内容"
      width="300">
    </el-table-column>
    <el-table-column
      prop="date"
      label="评论发表时间">
    </el-table-column>
    <el-table-column
      prop="likedCount"
      label="点赞数">
    </el-table-column>
  </el-table>
  <el-pagination
    @size-change="handleSizeChange"
    @current-change="handleCurrentPageChange"
    :page-sizes="[10,20,30,40,50]"
    background
    layout="total, sizes, prev, pager, next, jumper"
    :total="1000"><!--显示总共有多少条数据-->
  </el-pagination>
</div>
</template>

<script>
export default {
  name: '',
  data () {
    return {
      tableData: [],
      currentPage: 1, // 初始页
      pageSize: 10 // 每页的数据条数
    }
  },
  methods: {
    queryCommentByIdAndType () {
      this.$axios.get('/api/v1/resource/comments/R_SO_4_' + this.$store.state.songId, {
        params: {
          limit: this.pageSize,
          offset: (this.currentPage - 1) * this.pageSize
        }
      })
        .then(res => {
          // 解决res.data.小数点后面跟变量
          var comments = res.data[this.$store.state.commentType]
          this.tableData = []
          for (let i = 0; i < comments.length; i++) {
            let comment = {}
            comment.userId = comments[i].user.userId
            comment.nickname = comments[i].user.nickname
            comment.content = comments[i].content
            let date = new Date(comments[i].time)
            comment.date = this.formatDate(date, 'yyyy-MM-dd hh:mm:ss')
            comment.likedCount = comments[i].likedCount
            this.tableData.push(comment)
          }
        }).catch(err => {
          console.log(err)
        })
    },
    back () {
      this.$router.push('/searchSong') // 实现路由跳转
    },
    padLeftZero (str) {
      return ('00' + str).substr(str.length)
    },
    // 将时间戳转换为标准时间
    formatDate (date, fmt) {
      let o = {
        'M+': date.getMonth() + 1, // 月份
        'd+': date.getDate(), // 日
        'h+': date.getHours(), // 小时
        'm+': date.getMinutes(), // 分
        's+': date.getSeconds(), // 秒
        'q+': Math.floor((date.getMonth() + 3) / 3), // 季度
        'S': date.getMilliseconds() // 毫秒
      }
      if (/(y+)/.test(fmt)) { // 年份
        fmt = fmt.replace(RegExp.$1, (date.getFullYear() + '').substr(4 - RegExp.$1.length))
      }
      for (let k in o) {
        if (new RegExp('(' + k + ')').test(fmt)) {
          let str = o[k] + ''
          fmt = fmt.replace(RegExp.$1, (RegExp.$1.length === 1) ? str : this.padLeftZero(str))
        }
      }
      return fmt
    },
    // 设置每页显示的条数
    handleSizeChange (size) {
      this.pageSize = size
    },
    // 设置当前页为第currentPage页
    handleCurrentPageChange (currentPage) {
      this.currentPage = currentPage
    }
  },
  // 跳转该页面之后,就会执行该函数
  mounted () {
    this.queryCommentByIdAndType()
  },
  // 监听函数
  watch: {
    currentPage () {
      console.log('Hello,World')
      this.queryCommentByIdAndType()
    },
    pageSize () {
      this.queryCommentByIdAndType()
    }
  }
}
</script>

<style scoped>

</style>

  • 6
    点赞
  • 73
    收藏
    觉得还不错? 一键收藏
  • 3
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 3
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值