java爬取网易云热评并制作词云。


假期被游戏缠绕无法自拔?或是被淡黄的长裙蓬松的头发洗脑?是否因为假期太长而闲着不知道干嘛呢,还不来敲代码!用java一起来爬个虫?

一、自我介绍

南京某高校大学生,今年寒假因为疫情,假期无限延长aaa,自己在家也不知道学点什么,有一段时间很迷茫,后来因为机缘巧合,碰到了一些it大佬,于是乎,我就代码敲敲敲,啊,这不,已经可以做个小爬虫啦哈哈哈

二、上代码

这个小项目是用java爬取网易云热门歌曲并制作热评的热词云图,效果如下:在这里插入图片描述
先列一下pom里加的依赖吧

   <!-- https://mvnrepository.com/artifact/com.squareup.okhttp3/okhttp -->
    <dependencies>
        <dependency>
        <groupId>com.squareup.okhttp3</groupId>
        <artifactId>okhttp</artifactId>
        <version>4.1.0</version>
    </dependency>
        <!-- JSON 操作库 -->
        <dependency>
            <groupId>com.alibaba</groupId>
            <artifactId>fastjson</artifactId>
            <version>1.2.62</version>
        </dependency>
         <!-- 下面tokenizers是为了中文分词引入 -->
        <dependency>
            <groupId>com.kennycason</groupId>
            <artifactId>kumo-tokenizers</artifactId>
            <version>1.17</version>
        </dependency>

这个是项目结构
在这里插入图片描述

1、封装各个model类

专辑类

package com.youkeda.music.model;

/**
*专辑类
 */
public class Album {
    private String id;
    private String name;
    private String picUrl;

    public String getId() {
        return id;
    }

    public void setId(String id) {
        this.id = id;
    }

    public String getName() {
        return name;
    }

    public void setName(String name) {
        this.name = name;
    }

    public String getPicUrl() {
        return picUrl;
    }

    public void setPicUrl(String picUrl) {
        this.picUrl = picUrl;
    }
}

歌曲对象

package com.youkeda.music.model;

import java.util.List;

/**
 * 歌单对象
 */
public class Artist {

    private String id;
    private List<String> alias;
    private String picUrl;
    private String briefDesc;
    private String img1v1Url;
    private String name;
    // 包含一组歌曲
    private List<Song> songList;

    public String getId() {
        return id;
    }

    public void setId(String id) {
        this.id = id;
    }

    public List<String> getAlias() {
        return alias;
    }

    public void setAlias(List<String> alias) {
        this.alias = alias;
    }

    public String getPicUrl() {
        return picUrl;
    }

    public void setPicUrl(String picUrl) {
        this.picUrl = picUrl;
    }

    public String getBriefDesc() {
        return briefDesc;
    }

    public void setBriefDesc(String briefDesc) {
        this.briefDesc = briefDesc;
    }

    public String getImg1v1Url() {
        return img1v1Url;
    }

    public void setImg1v1Url(String img1v1Url) {
        this.img1v1Url = img1v1Url;
    }

    public String getName() {
        return name;
    }

    public void setName(String name) {
        this.name = name;
    }

    public List<Song> getSongList() {
        return songList;
    }

    public void setSongList(List<Song> songList) {
        this.songList = songList;
    }
}

评论类



package com.youkeda.music.model;
//评论类
public class Comment {
    private String id;
    private String content;
    private String likedCount;
    private String time;
    private User commentUser;

    public String getId() {
        return id;
    }

    public void setId(String id) {
        this.id = id;
    }

    public String getContent() {
        return content;
    }

    public void setContent(String content) {
        this.content = content;
    }

    public String getLikedCount() {
        return likedCount;
    }

    public void setLikedCount(String likedCount) {
        this.likedCount = likedCount;
    }

    public String getTime() {
        return time;
    }

    public void setTime(String time) {
        this.time = time;
    }

    public User getCommentUser() {
        return commentUser;
    }

    public void setCommentUser(User commentUser) {
        this.commentUser = commentUser;
    }
}

歌曲类

package com.youkeda.music.model;
//歌曲类
import java.util.List;

public class Song {
    private String id;
    private String name;
    private List<User> singers;
    private String sourceUrl;
    private Album album;
    private List<Comment> hotComments;
    private List<Comment> comments;

    public List<User> getSingers() {
        return singers;
    }

    public void setSingers(List<User> singers) {
        this.singers = singers;
    }

    public String getSourceUrl() {
        return sourceUrl;
    }

    public void setSourceUrl(String sourceUrl) {
        this.sourceUrl = sourceUrl;
    }

    public Album getAlbum() {
        return album;
    }

    public void setAlbum(Album album) {
        this.album = album;
    }

    public List<Comment> getHotComments() {
        return hotComments;
    }

    public void setHotComments(List<Comment> hotComments) {
        this.hotComments = hotComments;
    }

    public List<Comment> getComments() {
        return comments;
    }

    public void setComments(List<Comment> comments) {
        this.comments = comments;
    }

    public String getId() {
        return id;
    }

    public void setId(String id) {
        this.id = id;
    }

    public String getName() {
        return name;
    }

    public void setName(String name) {
        this.name = name;
    }
}

评论者

package com.youkeda.music.model;

/*
评论者
 */
public class User {
    private String id;
    private String nickName;
    private String avatar;//头像

    public String getId() {
        return id;
    }

    public void setId(String id) {
        this.id = id;
    }

    public String getNickName() {
        return nickName;
    }

    public void setNickName(String nickName) {
        this.nickName = nickName;
    }

    public String getAvatar() {
        return avatar;
    }

    public void setAvatar(String avatar) {
        this.avatar = avatar;
    }
}

2、实现各项功能的service类

service接口类

package com.youkeda.music.service;

import com.youkeda.music.model.Artist;
import com.youkeda.music.model.Song;
//定义service接口

public interface SongCrawlerService {
     void start(String artistId);
     Artist getArtist(String artistId);
     Song getSong(String artistId, String songId);


}

接口实现类

package com.youkeda.music.service.impl;

import com.alibaba.fastjson.JSON;
import com.youkeda.music.model.*;
import com.youkeda.music.service.SongCrawlerService;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import com.youkeda.music.util.WordCloudUtil;
import okhttp3.Call;
import okhttp3.OkHttpClient;
import okhttp3.Request;

/**
 * 音乐抓取服务的实现
 */
public class SongCrawlerServiceImpl implements SongCrawlerService {

    private static final String ARTIEST_API_PREFIX = "http://neteaseapi.youkeda.com:3000/artists?id=";
    private static final String S_D_API_PREFIX = "http://neteaseapi.youkeda.com:3000/song/detail?ids=";
    private static final String S_C_API_PREFIX = "http://neteaseapi.youkeda.com:3000/comment/music?id=";
    private static final String S_F_API_PREFIX = "http://neteaseapi.youkeda.com:3000/song/url?id=";


    // okHttpClient 实例
    private OkHttpClient okHttpClient;

    // 歌单数据仓库
    private Map<String, Artist> artists;

    private void init() {
        //1. 构建 okHttpClient 实例
        okHttpClient = new OkHttpClient();
        artists = new HashMap<>();
    }

    @Override
    public void start(String artistId) {
        // 参数判断,未输入参数则直接返回
        if (artistId == null || artistId.equals("")) {
            return;
        }

        // 执行初始化
        init();
        //各个方法都重构封装,减少耦合性
        initArtistHotSongs(artistId);
        assembleSongDetail(artistId);
        assembleSongComment(artistId);
        assembleSongUrl(artistId);
        generateWordCloud(artistId);

    }

    @Override
    public Artist getArtist(String artistId) {
        return artists.get(artistId);
    }

    @Override
    public Song getSong(String artistId, String songId) {
        Artist artist = artists.get(artistId);
        List<Song> songs = artist.getSongList();

        if (songs == null) {
            return null;
        }

        for (Song song : songs) {
            if (song.getId().equals(songId)) {
                return song;
            }
        }
        return null;
    }

    @SuppressWarnings("unchecked")
    private Map getSourceDataObj(String prefix, String postfix) {
        // 构建歌单url
        String aUrl = prefix + postfix;
        // 调用 okhttp3 获取返回数据
        String content = getPageContentSync(aUrl);
        // 反序列化成 Map 对象
        Map returnData = JSON.parseObject(content, Map.class);

        return returnData;

    }

    @SuppressWarnings("unchecked")
    private Artist buildArtist(Map returnData) {
        // 从 Map 对象中取得 歌单 数据。歌单也是一个子 Map 对象。
        Map artistData = (Map) returnData.get("artist");
        Artist artist = new Artist();
        artist.setId(artistData.get("id").toString());
        if (artistData.get("picUrl") != null) {
            artist.setPicUrl(artistData.get("picUrl").toString());
        }
        artist.setBriefDesc(artistData.get("briefDesc").toString());
        artist.setImg1v1Url(artistData.get("img1v1Url").toString());
        artist.setName(artistData.get("name").toString());
        artist.setAlias((List) artistData.get("alias"));
        return artist;
    }

    private List<Song> buildSongs(Map returnData) {
        // 从 Map 对象中取得一组 歌曲 数据
        List songsData = (List) returnData.get("hotSongs");
        List<Song> songs = new ArrayList<>();

        for (int i = 0; i < songsData.size(); i++) {
            Map songData = (Map) songsData.get(i);
            Song songObj = new Song();
            songObj.setId(songData.get("id").toString());
            songObj.setName(songData.get("name").toString());

            songs.add(songObj);
        }

        return songs;
    }

    /**
     * 根据输入的url,读取页面内容并返回
     */
    private String getPageContentSync(String url) {
        //2.定义一个request
        Request request = new Request.Builder().url(url).build();
        //3.使用client去请求
        Call call = okHttpClient.newCall(request);
        String result = null;
        try {
            //4.获得返回结果
            result = call.execute().body().string();
            System.out.println("call " + url + " , content's size=" + result.length());
        } catch (IOException e) {
            System.out.println("request " + url + " error . ");
            e.printStackTrace();
        }

        return result;
    }

    /**
     * 初始化歌单及热门歌曲
     *
     * @param artistId
     */
    private void initArtistHotSongs(String artistId) {
        // 取得整体数据对象。
        Map returnData = getSourceDataObj(ARTIEST_API_PREFIX, artistId);
        // 构建填充了属性的 Artist 实例
        Artist artist = buildArtist(returnData);
        // 构建一组填充了属性的 Song 实例
        List<Song> songs = buildSongs(returnData);
        // 歌曲填入歌单
        artist.setSongList(songs);
        // 存入本地
        artists.put(artist.getId(), artist);
    }

    /**
     * 装配 歌曲详情
     *
     * @param artistId
     */
    @SuppressWarnings("unchecked")
    private void assembleSongDetail(String artistId) {
        Artist artist = getArtist(artistId);
        // 取不到歌单说明参数输入错误
        if (artist == null) {
            return;
        }

        List<Song> songs = artist.getSongList();
        // 一个歌单中所有歌曲的id,组装成用逗号分割的字符串,形如:347230,347231。记住这个用法,很方便
        String sIdsParam = buildManyIdParam(songs);
        // 抓取结果
        Map songsDetailObj = getSourceDataObj(S_D_API_PREFIX, sIdsParam);
        // 原始数据中的 songs 是歌曲列表
        List<Map> sourceSongs = (List<Map>) songsDetailObj.get("songs");
        // 临时的 Map
        Map<String, Map> sourceSongsMap = new HashMap<>();
        // 遍历歌曲列表
        for (Map songSourceData : sourceSongs) {
            String sId = songSourceData.get("id").toString();
            // 原始歌曲数据对象放入一个临时的 Map 中
            sourceSongsMap.put(sId, songSourceData);
        }

        // 再次遍历歌单中的歌曲,填入详情数据
        for (Song song : songs) {
            String sId = song.getId();
            // 从临时的Map中取得对应的歌曲源数据,使用id直接获取,比较方便
            Map songSourceData = sourceSongsMap.get(sId);
            // 源歌曲数据中,ar 字段是歌手列表
            List<Map> singersData = (List<Map>) songSourceData.get("ar");
            // 歌手集合
            List<User> singers = new ArrayList<>();
            for (Map singerData : singersData) {
                // 歌手对象
                User singer = new User();
                singer.setId(singerData.get("id").toString());
                singer.setNickName(singerData.get("name").toString());
                // 歌手集合放入歌手对象
                singers.add(singer);
            }
            // 歌手集合放入歌曲
            song.setSingers(singers);

            // 专辑
            Map albumData = (Map) songSourceData.get("al");
            Album album = new Album();
            album.setId(albumData.get("id").toString());
            album.setName(albumData.get("name").toString());
            if (albumData.get("picUrl") != null) {
                album.setPicUrl(albumData.get("picUrl").toString());
            }
            // 专辑对象放入歌曲
            song.setAlbum(album);
        }
    }


    /**
     * 装配 歌曲评论
     *
     * @param artistId
     */
    private void assembleSongComment(String artistId) {
        Artist artist = getArtist(artistId);
        List<Song> songs = artist.getSongList();
        for (Song song : songs) {
            String sIdsParam = song.getId() + "&limit=5";
            // 抓取结果
            Map songsCommontObj = getSourceDataObj(S_C_API_PREFIX, sIdsParam);
            List<Map> hotCommentsData = (List<Map>) songsCommontObj.get("hotComments");
            List<Map> commentsData = (List<Map>) songsCommontObj.get("comments");
            List<Comment> hotComments = getComments(hotCommentsData);
            List<Comment> commments = getComments(commentsData);
            song.setComments(commments);
            song.setHotComments(hotComments);
        }
    }


    /**
     * 装配 歌曲链接
     *
     * @param artistId
     */
//    private void assembleSongUrl(String artistId) {
//        Artist artist = getArtist(artistId);
//        // 删除其它语句,保留必要的语句
//        List<Song> songs = artist.getSongList();
//        String sIdsParam = buildManyIdParam(songs);
//        // 抓取结果
//        Map songsFileObj = getSourceDataObj(S_F_API_PREFIX, sIdsParam);
//        List data = (List) songsFileObj.get("data");
//        Map map = (Map) data.get(0);
//        for (Song song:songs){
//            song.setSourceUrl((String) map.get("url"));
//        }
//
//    }
    @SuppressWarnings("unchecked")
    private void assembleSongUrl(String artistId) {
        Artist artist = getArtist(artistId);
        // 取不到歌单说明参数输入错误
        if (artist == null) {
            return;
        }

        // 删除其它语句,保留必要的语句
        List<Song> songs = artist.getSongList();
        String sIdsParam = buildManyIdParam(songs);
        // 抓取结果
        Map songsFileObj = getSourceDataObj(S_F_API_PREFIX, sIdsParam);
        // 原始数据中的 data 是音乐文件列表
        List<Map> datas = (List<Map>) songsFileObj.get("data");
        // 临时的 Map
        Map<String, Map> sourceSongsMap = new HashMap<>();
        // 遍历音乐文件列表
        for (Map songFileData : datas) {
            String sId = songFileData.get("id").toString();
            // 原始音乐文件数据对象放入一个临时的 Map 中
            sourceSongsMap.put(sId, songFileData);
        }

        // 再次遍历歌单中的歌曲,填入音乐文件URL
        for (Song song : songs) {
            String sId = song.getId();
            // 从临时的Map中取得对应的音乐文件源数据,使用id直接获取,比较方便
            Map songFileData = sourceSongsMap.get(sId);
            // 源音乐文件数据中,url 字段就是文件地址
            if (songFileData != null && songFileData.get("url") != null) {
                String songFileUrl = songFileData.get("url").toString();
                song.setSourceUrl(songFileUrl);
            }
        }
    }

    private String buildManyIdParam(List<Song> songs) {
        // 收集一个歌单中所有歌曲的id,放入一个list
        List<String> songIds = new ArrayList<>();
        for (Song song : songs) {
            songIds.add(song.getId());
        }
        // 一个歌单中所有歌曲的id,组装成用逗号分割的字符串,形如:347230,347231。记住这个用法,很方便
        String sIdsParam = String.join(",", songIds);

        return sIdsParam;
    }

    private List<Comment> getComments(List<Map> rawData) {
        List<Comment> comments = new ArrayList<>();
        for (Map map : rawData) {
            Comment comment = new Comment();
            User u = new User();
            Map user = (Map) map.get("user");
            u.setId( user.get("userId").toString());
            u.setNickName(user.get("nickname").toString());
            u.setAvatar( user.get("avatarUrl").toString());

            comment.setCommentUser(u);
            comment.setContent((String) map.get("content"));
            comment.setId(map.get("commentId").toString());
            comment.setLikedCount( map.get("likedCount").toString());
            comment.setTime(map.get("time").toString());

            comments.add(comment);
        }
        return comments;
    }
    private void generateWordCloud(String artistId) {
        Artist artist = getArtist(artistId);
        List<Song> songs = artist.getSongList();
        List<String> contents = new ArrayList<>();
        for (Song song : songs) {
                // 遍历歌曲所有的评论,包括普通评论和热门评论,把评论内容字符串存入 contents 集合
                contents.add(getCommentContents(song.getHotComments()));
                contents.add(getCommentContents(song.getComments()));
            }

        // 调用方法,制作词云
        WordCloudUtil.generate(artistId, contents);
    }
    private String getCommentContents(List<Comment> comments){
        String contents = "";
        for (Comment comment:comments){
            contents += comment.getContent();
        }
        System.out.println(contents);
        return contents;
    }

}

3、设计一个制作词云的工具类util

package com.youkeda.music.util;

import com.kennycason.kumo.CollisionMode;
import com.kennycason.kumo.WordCloud;
import com.kennycason.kumo.WordFrequency;
import com.kennycason.kumo.bg.CircleBackground;
import com.kennycason.kumo.font.FontWeight;
import com.kennycason.kumo.font.KumoFont;
import com.kennycason.kumo.font.scale.SqrtFontScalar;
import com.kennycason.kumo.nlp.FrequencyAnalyzer;
import com.kennycason.kumo.nlp.tokenizers.ChineseWordTokenizer;
import com.kennycason.kumo.palette.ColorPalette;
import java.awt.Color;
import java.awt.Dimension;
import java.time.LocalDateTime;
import java.time.ZoneOffset;
import java.util.List;

/**
 * 生成图云的工具类
 */
public class WordCloudUtil {

    /**
     * 生成词云
     *
     * @param artistId 歌单id
     * @param texts 文本
     */
    public static void generate(String artistId, List<String> texts) {

        FrequencyAnalyzer frequencyAnalyzer = new FrequencyAnalyzer();
        //设置返回的词数
        frequencyAnalyzer.setWordFrequenciesToReturn(500);
        //设置返回的词语最小出现频次
        frequencyAnalyzer.setMinWordLength(4);

        //引入中文解析器
        frequencyAnalyzer.setWordTokenizer(new ChineseWordTokenizer());
        //输入文章数据,进行分词
        final List<WordFrequency> wordFrequencyList = frequencyAnalyzer.load(texts);
        //设置图片分辨率大小
        Dimension dimension = new Dimension(600, 600);
        //此处的设置采用内置常量即可,生成词云对象
        WordCloud wordCloud = new WordCloud(dimension, CollisionMode.PIXEL_PERFECT);
        //设置边界及字体
        wordCloud.setPadding(2);
        // 设置字体,字体必须支持中文,不能随便改
        wordCloud.setKumoFont(new KumoFont("阿里巴巴普惠体 Light", FontWeight.PLAIN));
        //设置词云显示的三种颜色,越靠前设置表示词频越高的词语的颜色
        wordCloud.setColorPalette(
                new ColorPalette(new Color(0x4055F1), new Color(0x408DF1), new Color(0x40AAF1),
                        new Color(0x40C5F1), new Color(0x40D3F1), new Color(0xFFFFFF)));
        wordCloud.setFontScalar(new SqrtFontScalar(10, 70));
        //设置背景图层为圆形
        wordCloud.setBackground(new CircleBackground(300));
        //生成词云
        wordCloud.build(wordFrequencyList);
        //输出到图片文件,用当前的毫秒数作为文件名
        Long milliSecond = LocalDateTime.now().toInstant(ZoneOffset.of("+8")).toEpochMilli();
        //输出到图片文件
        wordCloud.writeToFile("wordCloud-" + artistId + ".png");
    }
}

4、最后测试类

package com.youkeda.music.test;

import com.youkeda.music.model.Artist;
import com.youkeda.music.model.Song;
import com.youkeda.music.service.SongCrawlerService;
import com.youkeda.music.service.impl.SongCrawlerServiceImpl;

/**
 * 检查服务是否可以正确返回对象
 */
public class SongCrawlerTest {

    private static final String SA_DING_DING = "萨顶顶";
    private static final String A_ID = "9270";
    private static final String ZUO_SHOU_ZHI_YUE = "左手指月";
    private static final String S_ID = "536096151";

    public static void main(String[] args) {
        SongCrawlerService songService = new SongCrawlerServiceImpl();
        songService.start(A_ID);

        Artist artist = songService.getArtist(A_ID);
        System.out.println("歌单名称:" + artist.getName());
        if (!SA_DING_DING.equals(artist.getName())) {
            System.out.println("歌单名称错误,不是本测试用例指定的歌单。");
            System.exit(1);
        }

        Song song = songService.getSong(A_ID, S_ID);
        System.out.println("歌曲名称:" + song.getName());
        if (!ZUO_SHOU_ZHI_YUE.equals(song.getName())) {
            System.out.println("歌曲名称错误,不是本测试用例指定的歌曲。");
            System.exit(1);
        }

        if (!SA_DING_DING.equals(song.getSingers().get(0).getNickName())) {
            System.out.println("歌曲名称错误,不是本测试用例指定的歌曲。");
            System.exit(1);
        }

        if (!"香蜜沉沉烬如霜 电视原声音乐专辑".equals(song.getAlbum().getName())) {
            System.out.println("专辑名称错误,不是本测试用例指定的歌曲的专辑。");
            System.exit(1);
        }

        if (song.getSourceUrl() == null) {
            System.out.println("歌曲名称错误,不是本测试用例指定的歌曲。");
            System.exit(1);
        }

        if (song.getHotComments() == null || song.getHotComments().isEmpty()) {
            System.out.println("歌曲热门评论错误,没有正确抓取评论数据。");
            System.exit(1);
        }

        System.out.println("歌曲所属专辑名称:" + song.getAlbum().getName());
        System.out.println("歌曲的歌手名称:" + song.getSingers().get(0).getNickName());
        System.out.println("歌曲音乐为文件地址:" + song.getSourceUrl());
        System.out.println("歌曲热门评论:" + song.getHotComments().get(0).getContent());

        System.out.println("歌曲服务运行成功。非常棒!");
        System.exit(0);
    }
}

运行成功就大功告成啦!!啊哈哈哈——出来吧!词云!

三、总结

也不知为什么,会写这一篇文章,(这是我刚做完高数作业的时候,突发奇想就来这里码了)可能就是想记录一下自己的小成功,让自己确信自己在进步,让自己小快乐一下,毕竟自己能做个小项目出来,我真的,哈哈哈蛮激动的。再接再厉!希望看到这里的你也能找到属于自己的学习方法,早日从小白晋升为大佬,啊哈哈哈。

第一次csdn创作,求大佬指正!

求多多鼓励支持一下下,奥利给!

在这里插入图片描述

  • 13
    点赞
  • 26
    收藏
    觉得还不错? 一键收藏
  • 2
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 2
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值