首先需要下载相关的依赖包,我用的是eclipse,直接百度maven然后搜索相应的依赖放到pom文件中就行了。
要做的任务是给定关键词,统计文章中出现该关键词的次数。
代码的大致步骤为:
1.将要查找的关键词保存到source.txt中,每个关键词单独换行,即每输入一个关键词就换行继续输入;将要查找的文章保存为find.txt;
2.用InputStreamReader读取文本文件,编码设置为gb2312,若是其他编码更改即可。
3.用indexOf方法统计文章字符串中出现关键词的次数。
完整代码如下:
package com.xlh.bd.internal.service;import java.awt.Color;
import java.awt.Font;
import java.io.Closeable;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.LineNumberReader;
import java.util.ArrayList;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Set;
import org.jfree.chart.ChartFactory;
import org.jfree.chart.ChartFrame;
import org.jfree.chart.JFreeChart;
import org.jfree.chart.axis.CategoryAxis;
import org.jfree.chart.axis.CategoryLabelPositions;
import org.jfree.chart.axis.NumberAxis;
import org.jfree.chart.axis.NumberTickUnit;
import org.jfree.chart.axis.ValueAxis;
import org.jfree.chart.plot.CategoryPlot;
import org.jfree.chart.plot.PlotOrientation;
import org.jfree.chart.title.TextTitle;
import org.jfree.data.category.DefaultCategoryDataset;
public class CountKeywordsService {
public class TextFileSearch {
public List<String> readTxt(File txt){
LineNumberReader findTxtReader = null; //待查找文本的行读取
List<String> words = new ArrayList<String>();
try {
findTxtReader = new LineNumberReader(new InputStreamReader(new FileInputStream(txt),"gb2312"));
String readLine = null;
while((readLine =findTxtReader.readLine()) != null){
words.add(readLine);
}
} catch (IOException e) {
e.printStackTrace();
} finally {
close(findTxtReader); //关闭流
}
return words;
}
public LinkedHashMap<String,Integer> SearchKeyword(File findTxt,File keyWordTxt) {
/*
* @param findTxt是用户留言等待查找的文本
* @param keywordTxt是关键词文本,没写一个词都进行一次换行
*/
List<String> findWords = readTxt(findTxt);
List<String> keyWords = readTxt(keyWordTxt);
LinkedHashMap<String,Integer> set = new LinkedHashMap<String,Integer>();
for (String string : keyWords) {
System.out.println(string);
int wordCout = 0;
for (String string2 : findWords) {
int index = 0;
int next = 0;
int times = 0;//出现的次数
while((index = string2.indexOf(string,next)) != -1) {
next = index + string.length();
times++;
}
wordCout += times;
}
if(wordCout > 0) {
set.put(string, wordCout);
System.out.println("文中出现关键词"+string+"达"+wordCout+" 次");
}
}
return set;
}
private void close(Closeable able){ //关闭流
if(able != null){
try {
able.close();
} catch (IOException e) {
e.printStackTrace();
able = null;
}
}
}
}
public void drawBarChart(LinkedHashMap<String,Integer> set){
TextTitle title = new TextTitle("柱状图");
DefaultCategoryDataset dataset = new DefaultCategoryDataset();
Set<String> keys = set.keySet();
for (String str:keys) { //关键词数
dataset.addValue(set.get(str),str,str);
}
// System.out.println(dataset.getRowKeys());
JFreeChart chart = ChartFactory.createBarChart3D("柱状图", //图表标题
"关键词", //目录轴的显示标签
"出现次数", //数值轴的显示标签
dataset, //数据集
PlotOrientation.VERTICAL, //图标方向:水平、竖直
true, //是否显示图例
false, //是否生成工具
false); //是否生成URL连接
CategoryPlot plot = chart.getCategoryPlot(); //获取图表区域对象
CategoryAxis domainAxis = plot.getDomainAxis(); //获取横坐标
//设置横坐标的标题字体和大小
domainAxis.setLabelFont(new Font("黑体", Font.BOLD,14));
//设置横坐标的坐标值的字体和大小
domainAxis.setTickLabelFont(new Font("宋体",Font.BOLD,12));
// 设置横坐标的显示
domainAxis.setCategoryLabelPositions(CategoryLabelPositions.createUpRotationLabelPositions(0.4));
ValueAxis rangeAxis = plot.getRangeAxis();
rangeAxis.setLabelFont(new Font("黑体", Font.BOLD,15));
// 自定义设定背景色
chart.setBackgroundPaint(Color.WHITE);
// 设定图表数据显示部分背景色
plot.setBackgroundPaint(Color.BLACK);
// 横坐标网格线
plot.setDomainGridlinePaint(Color.RED);
// 设置网格线可见
plot.setDomainGridlinesVisible(true);
// 纵坐标网格线
plot.setRangeGridlinePaint(Color.RED);
chart.getTitle().setFont(new Font("黑体", Font.BOLD,20));
ChartFrame frame = new ChartFrame("柱状图", chart);
title.getBackgroundPaint();
title.setFont(new Font("黑体", Font.CENTER_BASELINE, 50));
// 获取纵坐标
NumberAxis numberaxis = (NumberAxis) plot.getRangeAxis();
// 设置纵坐标的标题字体和大小
numberaxis.setLabelFont(new Font("黑体", Font.CENTER_BASELINE, 24));
// 设置丛坐标的坐标值的字体颜色
numberaxis.setLabelPaint(Color.BLACK);
// 设置丛坐标的坐标轴标尺颜色
numberaxis.setTickLabelPaint(Color.RED);
// 坐标轴标尺颜色
numberaxis.setTickMarkPaint(Color.BLUE);
// 设置丛坐标间距值
numberaxis.setAutoTickUnitSelection(false);
numberaxis.setTickUnit(new NumberTickUnit(150));
// 这句代码解决了底部汉字乱码的问题
chart.getLegend().setItemFont(new Font("黑体", 0, 16)); // 设置图例
// 设置标题的字体颜色
title.getBackgroundPaint();
title.setFont(new Font("黑体", 0, 16));
title.setPaint(Color.RED);
chart.setTitle(title);
frame.pack(); //画图
frame.setVisible(true);
}
public static void main(String[] args) {
CountKeywordsService test = new CountKeywordsService();
TextFileSearch search = test.new TextFileSearch();
LinkedHashMap<String,Integer> set = search.SearchKeyword(new File("D:\\xlh\\src\\find.txt"), new File("D:\\xlh\\src\\source.txt"));
test.drawBarChart(set); //将词出现的频数画成柱状图
}
}