方法调用流程图:
第一步:调用tokenStream方法:
第二步:调用createComponents方法生成TokenStreamComponents
第三步:分词的具体逻辑处理,并把分好的词放在自定义的attribute中
代码实现:
package com.shidebin.lucence.lucence_quickStart;
import java.io.IOException;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.util.Attribute;
import org.apache.lucene.util.AttributeImpl;
//1.定义MyAnalysis实现Analyzer并实现createComponents
//2.在createComponents方法中创建TokenStreamComponents,从TokenStreamComponents构造器能发现可传俩个参数source。sink
//TokenStream的子类Tokenizer,TokenStream使用了装饰者模式
import org.apache.lucene.util.AttributeReflector;
public class MyAnalysis extends Analyzer{
@Override
protected TokenStreamComponents createComponents(String fieldName) {
MyTokenizer source = new MyTokenizer();
TokenStream sink = new MyTokenFilter(source);
return new TokenStreamComponents(source);
}
public static class MyTokenizer extends Tokenizer{
Myattribute addAttribute = this.addAttribute(Myattribute.class);
@Override
public boolean incrementToken() throws IOException {
// 清除所有的词项属性
clearAttributes();
char[] word = new char[255];
int position = 0;
while(true) {
int read = this.input.read();
//没读到末尾
if(read != -1) {
if(Character.isWhitespace((char)read)){
if(position > 0) {
addAttribute.genAttribute(word);
}
return true;
}else {
word[position++] = (char)read;
}
}else {
if(position > 0) {
addAttribute.genAttribute(word);
return true;
}
return false;
}
}
}
}
public static class MyTokenFilter extends TokenFilter{
protected MyTokenFilter(TokenStream input) {
super(input);
}
@Override
public boolean incrementToken() throws IOException {
// 对MyTokenizer分词之后的词再进行处理,例如大写转小写
return false;
}
}
public static interface Myattribute extends Attribute{
void genAttribute(char[] word);
String getAttribute();
}
public static class MyattributeImpl extends AttributeImpl implements Myattribute{
private String attribute;
@Override
public void clear() {
// TODO Auto-generated method stub
}
@Override
public void reflectWith(AttributeReflector reflector) {
// TODO Auto-generated method stub
}
@Override
public void copyTo(AttributeImpl target) {
// TODO Auto-generated method stub
}
@Override
public void genAttribute(char[] word) {
this.attribute = new String(word).trim();
}
@Override
public String getAttribute() {
return this.attribute;
}
}
public static void main(String[] args) {
MyAnalysis ana = new MyAnalysis();
String text = "fasfas ERIKNI fasf FASDFdfsd FASFJKL jfkadsjfakl";
TokenStream tokenStream = ana.tokenStream("abc", text);
try {
Myattribute attribute = tokenStream.getAttribute(Myattribute.class);
tokenStream.reset();
while(tokenStream.incrementToken()) {
System.out.print(attribute.getAttribute()+"|");
}
tokenStream.end();
System.out.println();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}