中科院分词调用代码，无词性标注

最新推荐文章于 2022-05-22 14:43:17 发布

程志刚。。。

最新推荐文章于 2022-05-22 14:43:17 发布

阅读量679

点赞数

文章标签：编码

package test;

import java.io.File;

import ICTCLAS.I3S.AC.ICTCLAS50;
public class fenci{
public void callICTCLAS(String inTxtPath, String outTxtPath) {
/*
* 本类实现调用ICTCLAS进行中文分词，并将结果保存到独立的txt文件中
* 作者：郭喜跃
* 时间：2013-7-20
*
* */
//指定分词的源文件和目标文件
String sourceTxt = inTxtPath;
String targetTxt = outTxtPath;
try {
//创建分词实例
ICTCLAS50 testICTCLAS50 = new ICTCLAS50();
//分词所需库的路径
String argu = new File("").getAbsolutePath();
//设置字符编码
testICTCLAS50.ICTCLAS_Init(argu.getBytes("UTF-8"));
//设置用户词表路径
int nCount = 0;
// String usrdir = "d:/projectFiles/userDict/userdict.txt";//用户字典路径
//将用户词典从string转化为byte类型
// byte[] usrdirb = usrdir.getBytes();
//导入用户字典,返回导入用户词语个数第一个参数为用户字典路径，第二个参数为用户字典的编码类型
// nCount = testICTCLAS50.ICTCLAS_ImportUserDictFile(usrdirb, 0);
//参数转换
String s1 = "d:/1.txt";
String s2 = "d:/2.txt";
//进行分词
testICTCLAS50.ICTCLAS_FileProcess(s1.getBytes(), 0, 0,
s2.getBytes());
//退出分词程序
testICTCLAS50.ICTCLAS_Exit();
// jLabel2.setText("文档分词成功");
System.out.print("11");
} catch (Exception e) {
System.out.print("22");

// jLabel2.setText("文档分词失败");
}
}
public static void main(String[] args)
{
fenci s1=new fenci();
String s = null,s2 = null;
s1.callICTCLAS(s, s2 ) ;

}}