基于这两篇博客的代码实现,可以直接运行,并附有注释
https://blog.csdn.net/chengzheng_hit/article/details/54752673
https://blog.csdn.net/xiaoyeyopulei/article/details/25194021
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
class Result
{
public int c_all = 0;
public int c_in_dict = 0;
public int c_out_dict = 0;
public Map<Integer,Integer> count = new HashMap();
public String request;
@Override
public String toString() {
StringBuffer sb = new StringBuffer();
sb.append("总次数:").append(c_all).append("\n");
sb.append("单字字典词数:").append(c_in_dict).append("\n");
sb.append("单字非字典词数:").append(c_out_dict).append("\n");
sb.append("字数分割统计:").append("\n");
for (Integer i : count.keySet()) {
sb.append("字数为"+i+"的词有").append(count.get(i))
.append("个").append("\n");
}
sb.append("最终切分结果:").append(request).append("\n");
return sb.toString();
}
}
public class Segmentation {
private List<String> dictionary = new ArrayList<String>();
private String request = "北京大学生前来应聘";
public void setDictionary() {
dictionary.add("北京");
dictionary.add("北京大学");
dictionary.add("大学");
dictionary.add("大学生");
dictionary.add("生前");
dictionary.add("前来");
dictionary.add("应聘");
}
public Result leftMax() {
Result r = new Result();
String response = "";
String s = "";
for(int i=0; i<request.length(); i++) {
s += request.charAt(i);
if(isIn(s, dictionary) && aheadCount(s, dictionary)==1) {
//如果在字典中,并且不是其他词的子串,那么就分割
response += (s + "/");
if(s.length() == 1)
{
r.c_in_dict++;
}
if(!r.count.containsKey(s.length()))
{
r.count.put(s.length(), 0);
}
r.count.put(s.length(), r.count.get(s.length())+1);
r.c_all++;
s = "";
} else if(aheadCount(s, dictionary) > 0) {
//如果这个词是字典中其他词的子串,那么就不管
} else {//如果这个词不在字典中,并且也没有子串,那么就分割
response += (s + "/");
if(s.length() == 1)
{
r.c_out_dict++;
}
if(!r.count.containsKey(s.length()))
{
r.count.put(s.length(), 0);
}
r.count.put(s.length(), r.count.get(s.length())+1);
r.c_all++;
s = "";
}
}
r.request = response;
return r;
}
public Result rightMax()
{
Result r = new Result();
String response = "";
String s = "";
for(int i=0; i<request.length(); i++) {
s += request.charAt(i);
if(isIn(s, dictionary) && tailCount(s, dictionary)==1) {
//如果在字典中,并且不是其他词的子串,那么就分割
response += (s + "/");
if(s.length() == 1)
{
r.c_in_dict++;
}
if(!r.count.containsKey(s.length()))
{
r.count.put(s.length(), 0);
}
r.count.put(s.length(), r.count.get(s.length())+1);
r.c_all++;
s = "";
} else if(aheadCount(s, dictionary) > 0) {
//如果这个词是字典中其他词的子串,那么就不管
} else {//如果这个词不在字典中,并且也没有子串,那么就分割
response += (s + "/");
if(s.length() == 1)
{
r.c_out_dict++;
}
if(!r.count.containsKey(s.length()))
{
r.count.put(s.length(), 0);
}
r.count.put(s.length(), r.count.get(s.length())+1);
r.c_all++;
s = "";
}
}
r.request = response;
return r;
}
public String biMax()
{
Result leftMax = leftMax();
Result rightMax = rightMax();
//比较一下各自的数量
return null;
}
/**
* 判断是否在字典中
* @param s
* @param list
* @return
*/
private boolean isIn(String s, List<String> list) {
for(int i=0; i<list.size(); i++) {
if(s.equals(list.get(i))) return true;
}
return false;
}
/**
*
* @param s
* @param list
* @return
*/
private int aheadCount(String s, List<String> list) {
int count = 0;
for(int i=0; i<list.size(); i++) {
//如果s是一个词的前缀,就记一次
if((s.length()<=list.get(i).length()) && (s.equals(list.get(i).substring(0, s.length())))) count ++;
}
return count;
}
private int tailCount(String s, List<String> list) {
int count = 0;
for(int i=0; i<list.size(); i++) {
//如果s是一个词的后缀,就记一次
if((s.length()<=list.get(i).length()) && (list.get(i).endsWith(s))) count ++;
}
return count;
}
public static void main(String[] args) {
Segmentation seg = new Segmentation();
seg.setDictionary();
Result response1 = seg.rightMax();
System.out.println(response1);
Result response2 = seg.leftMax();
System.out.println(response2);
}
}