package cn.yys;
import org.apache.hadoop.io.IntWritable;
import java.lang.reflect.Type;
import java.util.HashMap;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class 分词 {
public static void main(String[] args) {
String b = "1,a, 28\n" +
"2,b, 35\n" +
"3,c,28\n" +
"4,d,35\n" +
"5,e,28\n" +
"6,a,28\n" +
"7,b,35\n" +
"8,c,28\n" +
"9,a,28";
b = b.replaceAll(" +","");
String[] a = b.split("[\\t\\n\\r\\f\\s\\W]");
System.out.println(a[2]);
Map<String, Integer> wordCount = new HashMap();
for(int i =0; i<a.length; i++){
String name = a[i];
if (wordCount.containsKey(name))
wordCount.put(name, wordCount.get(name) + 1);
else
wordCount.put(name, 1);
}
for(Map.Entry<String, Integer> entry : wordCount.entrySet()) {
// 获取每个值
String wordValue = entry.getKey();
System.out.println(wordValue+" "+entry.getValue());
}
}
}
java分词, 统计词频
最新推荐文章于 2024-04-25 11:13:21 发布