先添加依赖 然后直接用就行了
<dependency>
<groupId>org.apache.httpcomponents</groupId>
<artifactId>httpclient</artifactId>
<version>4.5.12</version>
</dependency>
<dependency>
<groupId>com.kennycason</groupId>
<artifactId>kumo-core</artifactId>
<version>1.27</version>
</dependency>
<dependency>
<groupId>com.kennycason</groupId>
<artifactId>kumo-tokenizers</artifactId>
<version>1.27</version>
</dependency>
package com.yang;
import com.kennycason.kumo.CollisionMode;
import com.kennycason.kumo.WordCloud;
import com.kennycason.kumo.WordFrequency;
import com.kennycason.kumo.bg.CircleBackground;
import com.kennycason.kumo.font.KumoFont;
import com.kennycason.kumo.font.scale.SqrtFontScalar;
import com.kennycason.kumo.nlp.FrequencyAnalyzer;
import com.kennycason.kumo.nlp.tokenizers.ChineseWordTokenizer;
import com.kennycason.kumo.palette.LinearGradientColorPalette;
import org.apache.http.HttpEntity;
import org.apache.http.client.config.CookieSpecs;
import org.apache.http.client.config.RequestConfig;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.util.EntityUtils;
import java.awt.*;
import java.io.InputStream;
import java.util.*;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class PanBibi {
//av 号 获取 cid
public static String getBofqi(String aid) throws Exception{
RequestConfig defaultConfig = RequestConfig.custom().setCookieSpec(CookieSpecs.STANDARD).build();
CloseableHttpClient closeableHttpClient = HttpClients.createDefault() ;
HttpGet httpGet = new HttpGet("https://www.bilibili.com/video/av"+aid+"/") ;
httpGet.setConfig(defaultConfig);
CloseableHttpResponse httpResponse = closeableHttpClient.execute(httpGet) ;
HttpEntity httpEntity = httpResponse.getEntity() ;
String en= EntityUtils.toString(httpEntity) ;
String con = "cid=(.*)?&aid=" ;
Pattern ah = Pattern.compile(con);
Matcher mr = ah.matcher(en);
while(mr.find()) {
String id = mr.group();
String newUrl = id.replace("cid=","") ;
String x = newUrl.replace("&aid=","") ;
System.out.println(x);
return x;
}
return "";
}
//访问 弹幕地址 获取弹幕
public static Map<String,Integer> ReaderBiBi(String x) throws Exception{
CloseableHttpClient closeableHttpClient = HttpClients.createDefault() ;
HttpGet httpGet1 = new HttpGet("http://comment.bilibili.com/"+x+".xml");
CloseableHttpResponse httpResponse1 = closeableHttpClient.execute(httpGet1) ;
HttpEntity entity = httpResponse1.getEntity() ;
InputStream in = entity.getContent();
byte b[] = new byte[1024*100];//不用entity.tostring因为乱码
int temp=0; //所有读取的内容都使用temp接收
StringBuffer stringBuffer = new StringBuffer();
while((temp=in.read(b))!=-1){
stringBuffer.append(new String(b));
}
in.close();
Map<String,Integer>list=new HashMap<String, Integer>(1024);
String patstr="<d.*?>(.*?)</d>";
Pattern a = Pattern.compile(patstr);
Matcher m = a.matcher(stringBuffer+"");
String str=null;
Integer integer=0;
while(m.find()){
str=m.group().replaceAll("<d.*?>","").replaceAll("</d>","");
integer= list.get(str);
if(integer==null)
integer=0;
list.put(str,++integer);
}
System.out.println("共"+list.size()+"条弹幕");
return list;
}
//创建图片
public static void GreaderImg(Map<String,Integer> strs) throws Exception{
final FrequencyAnalyzer frequencyAnalyzer = new FrequencyAnalyzer();
frequencyAnalyzer.setWordFrequenciesToReturn(600);
frequencyAnalyzer.setMinWordLength(2);
frequencyAnalyzer.setWordTokenizer(new ChineseWordTokenizer());
// 可以直接从文件中读取
//final List<WordFrequency> wordFrequencies = frequencyAnalyzer.load(getInputStream("text/chinese_language.txt"));
final List<WordFrequency> wordFrequencies = new ArrayList();
for (Map.Entry<String,Integer> book : strs.entrySet()){
wordFrequencies.add(new WordFrequency(book.getKey(),book.getValue()));
}
//此处不设置会出现中文乱码
java.awt.Font font = new java.awt.Font("STSong-Light", 2, 18);
final Dimension dimension = new Dimension(900, 900);
final WordCloud wordCloud = new WordCloud(dimension, CollisionMode.PIXEL_PERFECT);
wordCloud.setPadding(2);
wordCloud.setBackground(new CircleBackground(255));
wordCloud.setFontScalar(new SqrtFontScalar(12, 42));
//设置词云显示的三种颜色,越靠前设置表示词频越高的词语的颜色
wordCloud.setColorPalette(new LinearGradientColorPalette(Color.RED, Color.BLUE, Color.GREEN, 30, 30));
wordCloud.setKumoFont(new KumoFont(font));
wordCloud.setBackgroundColor(new Color(255, 255, 255));
//因为我这边是生成一个圆形,这边设置圆的半径
wordCloud.setBackground(new CircleBackground(900));
wordCloud.build(wordFrequencies);
wordCloud.writeToFile("d://3.png");
}
// av 号 和 cid 弹幕的编号
public static void ComeKna(String av,String cid) throws Exception{
String bofqi=null;
if(cid!=null)
bofqi=cid;
else
bofqi= getBofqi(av);
if(bofqi==null) return;
Map<String,Integer> str= PanBibi.ReaderBiBi(bofqi);
PanBibi.GreaderImg(str);
}
}
调用
//传第二个了 就不用在找cid了 直接获取 生成
PanBibi.ComeKna("626634287","221674886");
1 通过java获取 B站视频的弹幕 数据
可以在控制台直接打aid获取
可以通过网络窗口获取cid的名称 这样就可以获取放弹幕的文件了
然后访问这个地址 获取数据 在通过数据权重生成词云