逆向最大匹配,顾名思义,根据你的字典词的最大长度,设置一个你接受文字的容器,然后在容器里每次把第一字去除。举个简单的例子,比如,“我是中国人,来自中国人民解放军”,我的字典中最大词如果也是15,那么我先匹配“我是中国人,来自中国人民解放军”是否在字典中,没有把“我”去掉,然后匹配“是中国人,来自中国人民解放军”也没有,以此类推,最后匹配到“中国人民解放军”。
先定义一个用来匹配数据的队列
public class Queue {
private String str="";
private String strOut = "";
private int iStartPos = 0;
private int iMaxNumber;
public void put(char c){
str += c;
}
public boolean pop(){
iStartPos++;
if(iStartPos == str.length()){
iStartPos--;
return false;
}
strOut=str.substring(iStartPos,str.length());
return true;
}
public String getStr() {
return str;
}
public void setStr(String str) {
iMaxNumber = str.length();
strOut = str;
this.str = str;
}
public String getOutStr(){
return strOut;
}
public int getStratPos(){
return iStartPos;
}
// public void setiMaxNum(int iNum){
// this.iMaxNumber = iNum;
// }
}
定义一个对文件读取,以及是否存在于文件中
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
//import java.io.FileReader;
import java.io.IOException;
import java.io.InputStream;
//import java.io.InputStreamReader;
//import java.io.RandomAccessFile;
import java.io.Reader;
public class ReadFromFile {
private String str;
public String readFile(String fileName) {
//String str = "";
File file = new File(fileName);
this.str = "";
try
{
FileInputStream fis = new FileInputStream(fileName);
int length;
//String str = "";
byte by[] = new byte[1024];
while((length = fis.read(by)) != -1) {
String strTemp = new String(by,0,length);
this.str = this.str + strTemp;
}
fis.close();
}
catch(Exception e) {
e.printStackTrace();
}
return this.str;
}
public boolean matched(String strWords){
if(this.str.contains("\r\n"+strWords +"\r\n")){
return true;
}
else
return false;
}
}
最后是主函数
import java.util.ArrayList;
public class UseReverse {
public static void main(String[] args) {
int iMaxSize = 1;
int iBegPos = 0;
String strSep = "";
//int iWordMaxSize = 0;
//String strInput;
ArrayList aSepWord = new ArrayList();
ReadFromFile rf = new ReadFromFile();
String str=rf.readFile("F:\\微博相关\\字典\\123.TXT");
//str=str.replaceAll("\\r\\n", "##");
str="\r\n"+str;
//System.out.println(str);
ReadFromFile rf1 = new ReadFromFile();
String strNeedSep = rf1.readFile("F:\\test.txt");
System.out.println(strNeedSep);
iMaxSize = strNeedSep.length();
//iBegPos = iMaxSize - 15;
while(iMaxSize != 0){
iBegPos = iMaxSize - 15;
if(iBegPos < 0){
strSep = strNeedSep.substring(0,iMaxSize);
iBegPos = 0;
}
else{
strSep = strNeedSep.substring(iBegPos, iMaxSize);
}
Queue qStr = new Queue();
qStr.setStr(strSep);
while(!rf.matched(qStr.getOutStr())){
if(!qStr.pop()){
break;
}
}
System.out.println(qStr.getOutStr());
aSepWord.add(qStr.getOutStr());
//最后是一个词
iMaxSize = iBegPos + qStr.getStratPos();
}
}
}