创建maven项目pom.xml文件
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>com.pactera</groupId>
<artifactId>pactera-lucene</artifactId>
<version>0.0.1-SNAPSHOT</version>
<dependencies>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>4.10</version>
</dependency>
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-log4j12</artifactId>
<version>1.6.4</version>
</dependency>
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-core</artifactId>
<version>4.10.2</version>
</dependency>
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-analyzers-common</artifactId>
<version>4.10.2</version>
</dependency>
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-queryparser</artifactId>
<version>4.10.2</version>
</dependency>
<dependency>
<groupId>cn.itcast.lucene.analyzer</groupId>
<artifactId>ik-analyzer</artifactId>
<version>2012-4.x</version>
</dependency>
</dependencies>
</project>
测试
//测试创建索引
@Test
public void testIndexWriter() throws IOException{
//创建索引目录
Directory directory = FSDirectory.open(new File("d:\\directory"));
//创建标准分词器
Analyzer analyzer = new StandardAnalyzer();
//索引配置
IndexWriterConfig indexWriterConfig =
new IndexWriterConfig(Version.LUCENE_4_10_2, analyzer);
indexWriterConfig.setOpenMode(OpenMode.CREATE);
//写索引
IndexWriter indexWriter = new IndexWriter(directory,indexWriterConfig);
//创建文档对象
Document doc = new Document();
doc.add(new IntField("id", 18, Store.YES));
doc.add(new TextField("title", "我们都是党的接班人yes or no?", Store.YES));
doc.add(new LongField("price", 6388L, Store.YES));
doc.add(new StringField("pic", "www.baidu.com", Store.YES));
//添加文档
indexWriter.addDocument(doc);
indexWriter.commit();
indexWriter.close();
}
在指定索引目录下查看索引
这种文件可以通过两种方式查看
第一种使用工具
用工具打开指定的目录就可以看到是怎么创建索引的
使用标准分词器汉字按单个字全部被拆分了
用lucene提供的TokenStream查看
@Test
public void testTokenStream() throws IOException{
//创建标准分词器
Analyzer analyzer = new StandardAnalyzer();
//词汇列表
TokenStream tokenStream = analyzer.tokenStream("title", "我们都是党的接班人yes or no?");
//tokenStream指针指向开始位置
tokenStream.reset();
//设置分词偏移量引用
OffsetAttribute offsetAttribute = tokenStream.addAttribute(OffsetAttribute.class);
//设置分词词语引用
CharTermAttribute charTermAttribute = tokenStream.addAttribute(CharTermAttribute.class);
//遍历词汇列表
while(tokenStream.incrementToken()){
//分词开始位置
System.out.println("分词开始位置:" + offsetAttribute.startOffset());
//分词词语
System.out.println("最小分词单元:" + charTermAttribute);
//分词结束位置
System.out.println("分词结束位置:" + offsetAttribute.endOffset());
}
}
结果