以QQ音乐为例子 解析公共接口的json数据

根据获得歌手的json数据的url

https://u.y.qq.com/cgi-bin/musicu.fcg?format=jsonp&inCharset=utf8&outCharset=utf-8&data=%7B%22comm%22%3A%7B%22ct%22%3A24%2C%22cv%22%3A10000%7D%2C%22singerList%22%3A%7B%22module%22%3A%22Music.SingerListServer%22%2C%22method%22%3A%22get_singer_list%22%2C%22param%22%3A%7B%22area%22%3A-100%2C%22sex%22%3A-100%2C%22genre%22%3A-100%2C%22index%22%3A1%2C%22sin%22%3A0%2C%22cur_page%22%3A1%7D%7D%7D
 

json数据部分截图

根据上面链接里获得数据,取出歌手mid,然后再根据下面的url获得歌曲列表

https://c.y.qq.com/v8/fcg-bin/fcg_v8_singer_track_cp.fcg?inCharset=utf8&outCharset=utf-8&notice=0&platform=yqq&needNewCode=0&singermid=002J4UUk29y8BY&order=listen&begin=0&num=30

json数据部分截图

 

下面是调用的详细代码,其中的HTTPUtil.sendGet方法是发送http请求的代码。在我这个博客

https://blog.csdn.net/Hello_Ray/article/details/80762232 里的代码11行代码和42-48行有http发送请求的代码,可以参考那个。

在测试这个方法,dao方法可以注释掉, HotSongSpider是引入别的类中转换list的方法,可以注释掉。

import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.jsoup.Jsoup;
import org.springframework.stereotype.Component;

import javax.annotation.Resource;
import java.net.URLEncoder;
import java.nio.charset.StandardCharsets;
import java.util.*;

/**
 * <p>description: </p>
 *
 * @author chenrui
 * @since 2018-08-09
 */
@Component
public class SpiderSingerNew {

    private static Logger log = LogManager.getLogger(SpiderSingerNew.class);

    @Resource
    private HotSongSpider hotSongSpider;
    @Resource
    private HotSongDao hotSongDao;

    private Integer singerListTotalPage = 1;  //所有页数

    private Integer singerSongListTotalPage = 1;

    //成功的歌手列表
    List<QQ2Singer> singerList = new ArrayList<>();
    //爬取歌手列表出现的错误标签和页面
    public static Queue<Map<String, Integer>> errorSingerList = new LinkedList<>();
    public static Queue<Map<String, String>> errorSingerSongList = new LinkedList<>();

    //爬取所有歌手的url
    String singerListUrl = "https://u.y.qq.com/cgi-bin/musicu.fcg?format=jsonp&inCharset=utf8&outCharset=utf-8&data=";
    String param = "{\"comm\":{\"ct\":24,\"cv\":10000},\"singerList\":{\"module\":\"Music.SingerListServer\",\"method\":\"get_singer_list\",\"param\":{\"area\":-100,\"sex\":-100,\"genre\":-100,\"index\":{index},\"sin\":{sin},\"cur_page\":{cur_page}}}}";
    //歌手下的全部歌曲
    String singerSongListUrl = "https://c.y.qq.com/v8/fcg-bin/fcg_v8_singer_track_cp.fcg?inCharset=utf8&outCharset=utf-8&notice=0&platform=yqq&needNewCode=0&singermid={singermid}&order=listen&begin={begin}&num={num}";

    public void spiderSingerList() {
        log.info("--start spider singer list--");
        int index = 1;
        //singer的list
        List<QQ2Singer> qq2SingerList = new ArrayList<>();
        try {
            for(; index<=27; index++) {
                for(int i=0; i < singerListTotalPage; i++) {
                    String sendParam = param.replace("{index}", String.valueOf(index)).replace("{sin}", String.valueOf(i*80)).replace("{cur_page}", String.valueOf(i+1));
                    String sendUrl = singerListUrl + URLEncoder.encode(sendParam, String.valueOf(StandardCharsets.UTF_8));
                    String content = HTTPUtil.sendGet(sendUrl).getContent();
                    parseSingerList(content, index, i);
                }
                //TODO 计算总页数
                singerListTotalPage=1;
            }
        } catch (Exception e) {
            log.error("error : start spider singer list {} {}", e.getMessage(), e.getCause());
            e.printStackTrace();
        }

        //循环处理出错的歌手,最多四次
        if(errorSingerList.size() > 0) {
            int singerEndlessFlag = 1;
            do {
                spiderErrorSingerList();
                singerEndlessFlag++;
            } while(singerEndlessFlag < 4 && errorSingerList.size()>0);
        }
        log.info("--finish spider singer list--");


        log.info("--start spider singer song list--");
        for(int i=0; i<singerList.size();i++) {
            String singermid = singerList.get(i).getSinger_mid();
            System.out.println("--开始第"+ (i+1) +"/"+singerList.size()+" singermid: "+ singermid +" 歌手的歌曲爬取--");
            spiderSingerSongList(singermid);
        }

        log.info("--处理歌手歌曲下的错误--");
        if(errorSingerSongList.size() > 0) {
            //循环处理出错的歌手,最多四次
            int songEndlessFlag = 1;
            do {
                spiderErrorSingerSongList();
                songEndlessFlag++;
            } while(errorSingerSongList.size()>0 && songEndlessFlag < 4  );
        }
        log.info("--finish spider singer song list--");
    }

    private int sum = 0;

    /**
     * singerlist是全局变量
     * 爬取全网的歌手速度很快,不用受到限制,所以先将歌手爬完再去爬取歌曲。
     * @param content
     * @param index
     * @param page
     */
    public void parseSingerList(String content, int index, int page) {
        log.info("开始 index:{} page:{} 将singer的json数组转为singer的list集合", index, page+1);
        JSONObject json = JSONObject.parseObject(content);
        try {
            //TODO 计算总页数
            if(singerListTotalPage<=1) {
                int total = json.getJSONObject("singerList").getJSONObject("data").getInteger("total");
                sum += total;
                System.out.println("singer list的index: "+index+" page: "+ page +" " + total + " sum " + sum);
                singerListTotalPage = (total + 80 - 1) / 80;
            }
            //将singer的json数组转为singer的list集合
            JSONArray singerListJsonArray = json.getJSONObject("singerList").getJSONObject("data").getJSONArray("singerlist");
            for(int i=0; i<singerListJsonArray.size(); i++) {
                QQ2Singer qq2Singer = singerListJsonArray.getJSONObject(i).toJavaObject(QQ2Singer.class);
                //将歌手信息添加到全局变量中
                singerList.add(qq2Singer);

                /*hotSongDao.saveQQSinger(qq2Singer);*/
                hotSongDao.update(qq2Singer);

            }
        } catch (Exception e) {
            log.error("转换singerlist,url标签位置{}, 所在页面{}, 出现错误 {} {}", index, page,e.getMessage(), e.getCause());

            Map<String , Integer> errorInfo = new HashMap<>();
            errorInfo.put("index", index);
            errorInfo.put("page", page);
            errorSingerList.add(errorInfo);
        }
        log.info("完成 index:{} page:{} 将singer的json数组转为singer的list集合", index, page+1);
    }
    /**
     * 继续根据歌手列表出现的错误标签和页面爬取歌手
     * errorSingerList定义的全局变量
     */
    public void spiderErrorSingerList() {
        Map<String, Integer> errorMap = errorSingerList.poll();
        while(errorMap!= null) {
            int index = errorMap.get("index");
            int page = errorMap.get("page");
            try {
                String sendParam = param.replace("{index}", String.valueOf(index)).replace("{sin}", String.valueOf(page*80)).replace("{cur_page}", String.valueOf(page+1));
                String sendUrl = singerListUrl + URLEncoder.encode(sendParam, String.valueOf(StandardCharsets.UTF_8));
                String content = HTTPUtil.sendGet(sendUrl).getContent();
                //根据index和page爬取对应的歌手
                parseSingerList(content, index, page);

                errorMap = errorSingerList.poll();
            } catch (Exception e) {
                Map<String , Integer> errorInfo = new HashMap<>();
                errorInfo.put("index", index);
                errorInfo.put("page", page);
                errorSingerList.add(errorInfo);
            }
        }

    }




    /**
     * 爬取歌手下的全部歌曲
     * @param singerMid
     */
    public void spiderSingerSongList(String singerMid) {
        List<HotSong> hotSongs = new ArrayList<>();
        try {
            for(int i=0; i<singerSongListTotalPage; i++) {
                String sendUrl = singerSongListUrl.replace("{singermid}", singerMid).replace("{begin}", String.valueOf(30*i)).replace("{num}", String.valueOf(30));
                String content = HTTPUtil.sendGet(sendUrl).getContent();
                parseSingerSongList(content, hotSongs, singerMid, i);
            }
            //TODO
            singerSongListTotalPage=1;
        } catch (Exception e) {
            log.error("爬取 {} 歌手下面的歌曲出现错误", singerMid);
        }
        List<Song> songList = hotSongSpider.transToPO(hotSongs);
        hotSongDao.saveOrUpdate(songList);
    }

    /**
     * content
     * @param content
     * @param singerMid
     */
    public void parseSingerSongList(String content, List<HotSong> hotSongs, String singerMid, int page) {
        JSONObject json = JSONObject.parseObject(content);
        //TODO
        if(singerSongListTotalPage<=1) {
            int total = json.getJSONObject("data").getInteger("total");
            singerSongListTotalPage = (total + 30-1)/30;
        }
        JSONArray jsonSongArray = json.getJSONObject("data").getJSONArray("list");
        try {

            for(int i=0; i < jsonSongArray.size(); i++) {
                HotSong song = jsonSongArray.getJSONObject(i).getJSONObject("musicData").toJavaObject(HotSong.class);

                //获得album_id爬取相应的图片
                String mid = song.getAlbummid();
                String albumImg = null;
                try {
                    //根据url拼接url+album_mid组成完成url,比如https://y.qq.com/n/yqq/album/004PCOKh1RUAqZ.html
                    String albumImgUri = MusicConstants.ALBUM_IMG_URI + mid + ".html";
                    //降低爬取速度
                    String html = HTTPUtil.sendGet(albumImgUri).getContent();
                    albumImg = Jsoup.parse(html).getElementById("albumImg").attr("src");
                } catch (Exception e) {

                }
                song.setAlbumImg(albumImg);

                hotSongs.add(song);
            }

        } catch (Exception e) {
            Map<String , String> errorInfo = new HashMap<>();
            errorInfo.put("singerMid", singerMid);
            errorInfo.put("page", String.valueOf(page));
            errorSingerSongList.add(errorInfo);

            log.error("爬取 {} 歌手, page: {} 下面的歌曲出现错误", singerMid, page);
        }
    }

    /**
     * 继续根据歌手列表出现的错误标签和页面爬取歌手
     * errorSingerList定义的全局变量
     */
    public void spiderErrorSingerSongList() {

        Map<String, String> errorMap = errorSingerSongList.poll();
        while(errorMap!= null) {
            List<HotSong> hotSongs = new ArrayList<>();
            String singerMid = errorMap.get("singerMid");
            int page = Integer.parseInt(errorMap.get("page"));
            try {
                String sendUrl = singerSongListUrl.replace("{singermid}", singerMid).replace("{begin}", String.valueOf(30*page)).replace("{num}", String.valueOf(30));
                String content = HTTPUtil.sendGet(sendUrl).getContent();
                parseSingerSongList(content, hotSongs, singerMid, page);
                List<Song> songList = hotSongSpider.transToPO(hotSongs);
                //保存歌曲
                hotSongDao.saveOrUpdate(songList);
                errorMap = errorSingerSongList.poll();
            } catch (Exception e) {
                Map<String , String> errorInfo = new HashMap<>();
                errorInfo.put("singerMid", singerMid);
                errorInfo.put("page", String.valueOf(page));
                errorSingerSongList.add(errorInfo);
            }

        }

    }


}

 

非常感谢有小伙伴能够重复和测试我的代码,如果在重复中出现问题,或者有关键方法没有引入。请联系我,或者留言。

特别注意:由于水平有限代码肯定存在不足的地方,希望能够指正,那么我将做到更好。

email:chenrui@marsdl.com

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值