利用IK Analyzer分词（txt输入输出）

最新推荐文章于 2024-09-04 10:16:51 发布

weixin_30561177

最新推荐文章于 2024-09-04 10:16:51 发布

阅读量192

点赞数 1

文章标签： java

原文链接：http://www.cnblogs.com/qijunhui/p/8445558.html

版权

1 说明

利用IK包分词较为便捷，该分词方法为正向匹配，故选此方法。
使用程序时需要导入分词包。
本程序需要用到的分词包已上传至附件。
本程序的输入输出文件名，请在主函数中更改。

2 程序

 1 import java.io.*;
 2 import org.apache.lucene.analysis.Analyzer;
 3 import org.apache.lucene.analysis.TokenStream;
 4 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
 5 import org.wltea.analyzer.lucene.IKAnalyzer;
 6 
 7 public class IK_FC {
 8     
 9     public static String Seg(String sentence) throws IOException {
10         String text="";
11         //创建分词对象
12         Analyzer anal=new IKAnalyzer(true);
13         StringReader reader=new StringReader(sentence);
14         //分词
15         TokenStream ts=anal.tokenStream("", reader);
16         CharTermAttribute term=ts.getAttribute(CharTermAttribute.class);
17         //遍历分词数据
18         while(ts.incrementToken()){
19             text+=term.toString()+"/";
20         }
21         reader.close();
22         anal.close();
23         return text.trim()+"\n";
24     }
25 
26     public static void main(String[] args) {
27         File file_ys = new File("E:/SimHash文档库/原始文档10.txt");//原始文件
28         File file_fch = new File("E:/SimHash文档库/对比文档10.txt");//分词后文件
29         try {
30             FileReader fr = new FileReader(file_ys);
31             BufferedReader bufr = new BufferedReader(fr);
32             FileWriter fw = new FileWriter(file_fch);
33             BufferedWriter bufw = new BufferedWriter(fw);
34             String s = null;
35             int i = 0;
36             while((s = bufr.readLine()) != null) {
37                 i++;
38                 String s_fch = IK_FC.Seg(s);
39                 System.out.println("第"+i+"行的原始数据：");
40                 System.out.println(s);
41                 System.out.println("第"+i+"行分词后的结果：");
42                 System.out.println(s_fch);
43                 bufw.write(s_fch);
44                 bufw.newLine();
45             }
46             bufr.close();
47             fr.close();
48             bufw.close();
49             fw.close();
50             System.out.println("文件处理完毕！");
51         } catch (Exception e) {
52             e.printStackTrace();
53         }
54     }
55 }