网上一直没有java调用c++的jieba分词,最近自己项目需求,做了一下简单的封装,经过本人实际测试可用,中间会有差别,欢迎留言,我会尽量解答。
1,编写java代码,java代码如下
import java.util.Arrays; import java.util.Map; public class JiebaForJava { /** * cut word * @param content * @param isHmm CUT WITH OR NOT HMM * @return */ public native String[] cut(String content,boolean isHmm); /** * * @param content * @return */ public native String[] cutAll(String content); /** * * @param content * @return */ public native String[] cutForSearch(String content); /** * * @param content */ public native void insertUserWord(String content); /** * * @param content * @return */ public native Map<String,String> tag(String content); /** * * @param path */ public native void loadUserDict(String path); public static void main(String[] args) { System.load("/home/spark/Desktop/cppjieba-master/test/JiebaForJava.so"); JiebaForJava demo = new JiebaForJava(); System.out.println(Arrays.toString(demo.cut("他来到了网易杭研大厦",true))); System.out.println(Arrays.toString(demo.cutAll("我来到北京清华大学"))); System.out.println(Arrays.toString(demo.cutForSearch("小明硕士毕业于中国科学院计算所,后在日本京都大学深造"))); System.out.println(Arrays.toString(demo.cut("男默女泪",true))); demo.insertUserWord("男默女泪"); System.out.println(Arrays.toString(demo.cut("男默女泪",true))); Map<String, String> tag = demo.tag("我是拖拉机学院手扶拖拉机专业的。不用多久,我就会升职加薪,当上CEO,走上人生巅峰。"); for(Map.Entry entry:tag.entrySet()){ System.out.println(entry.getKey()+":"+entry.getValue()); } demo.loadUserDict("/home/spark/Desktop/cppjieba-master/test/dict.txt"); System.out.println(Arrays.toString(demo.cut("小龙女说:我想过过过儿过的生活",true))); } }
2,利用java生成这个类的头文件
javac JiebaForJava.java 生成class文件
javah JiebaForJava 生成JiebaForJava.h文件
JiebaForJava.h文件内容如下
/* DO NOT EDIT THIS FILE - it is machine generated */
#include <jni.h>
/* Header for class JiebaForJava */
#ifndef _Included_JiebaForJava
#define _Included_JiebaForJava
#ifdef __cplusplus
extern "C" {
#endif
/*
* Class: JiebaForJava
* Method: cut
* Signature: (Ljava/lang/String;Z)[Ljava/lang/String;
*/
JNIEXPORT jobjectArray JNICALL Java_JiebaForJava_cut
(JNIEnv *, jobject, jstring, jboolean);
/*
* Class: JiebaForJava
* Method: cutAll
* Signature: (Ljava/lang/String;)[Ljava/lang/String;
*/
JNIEXPORT jobjectArray JNICALL Java_JiebaForJava_cutAll
(JNIEnv *, jobject, jstring);
/*
* Class: JiebaForJava
* Method: cutForSearch
* Signature: (Ljava/lang/String;)[Ljava/lang/String;
*/
JNIEXPORT jobjectArray JNICALL Java_JiebaForJava_cutForSearch
(JNIEnv *, jobject, jstring);
/*
* Class: JiebaForJava
* Method: insertUserWord
* Signature: (Ljava/lang/String;)V
*/
JNIEXPORT void JNICALL Java_JiebaForJava_insertUserWord
(JNIEnv *, jobject, jstring);
/*
* Class: JiebaForJava
* Method: tag
* Signature: (Ljava/lang/String;)Ljava/util/Map;
*/
JNIEXPORT jobject JNICALL Java_JiebaForJava_tag
(JNIEnv *, jobject, jstring);
/*
* Class: JiebaForJava
* Method: loadUserDict
* Signature: (Ljava/lang/String;)V
*/
JNIEXPORT void JNICALL Java_JiebaForJava_loadUserDict
(JNIEnv *, jobject, jstring);
#ifdef __cplusplus
}
#endif
#endif
3,下载jieba分词的c++原码,下载地址 https://github.com/yanyiwu/cppjieba
4,编写JiebaForJava.cpp,把JiebaForJava.h文件放入项目的test目录下面即可,在test目录下面新建JiebaForJava.cpp文件,我用的是vscode来编写c++的,对应上面的实现如下
#include "JiebaForJava.h"
#include "../include/cppjieba/Jieba.hpp"
#include "../include/cppjieba/DictTrie.hpp"
using namespace std;
const char* const DICT_PATH = "/home/spark/Desktop/cppjieba-master/test/dict.txt";
const char* const HMM_PATH = "../dict/hmm_model.utf8";
const char* const USER_DICT_PATH = "../dict/user.dict.utf8";
const char* const IDF_PATH = "../dict/idf.utf8";
const char* const STOP_WORD_PATH = "../dict/stop_words.utf8";
cppjieba::Jieba jieba(DICT_PATH,
HMM_PATH,
USER_DICT_PATH,
IDF_PATH,
STOP_WORD_PATH);
cppjieba::DictTrie dict_trie(DICT_PATH);
JNIEXPORT jobjectArray JNICALL Java_JiebaForJava_cut
(JNIEnv *env, jobject obj, jstring content, jboolean isHmm){
vector<string> words;
string str = env -> GetStringUTFChars(content,false);
jieba.Cut(str,words,true);
jobjectArray args = 0;
jsize len = (int)words.size();
args = env -> NewObjectArray(len,env -> FindClass("java/lang/String"),0);
jstring rtstr;
for(int i=0;i<len;i++){
rtstr = env -> NewStringUTF(words[i].c_str());
env->SetObjectArrayElement(args,i,rtstr);
}
return args;
}
JNIEXPORT jobjectArray JNICALL Java_JiebaForJava_cutAll
(JNIEnv *env, jobject obj, jstring content){
vector<string> words;
//change to string
string str = env->GetStringUTFChars(content,false);
jieba.CutAll(str,words);
jobjectArray args = 0;
jsize len = (int)words.size();
args = env -> NewObjectArray(len,env->FindClass("java/lang/String"),0);
jstring rtstr;
for(int i=0;i<len;i++){
rtstr = env -> NewStringUTF(words[i].c_str());
env->SetObjectArrayElement(args,i,rtstr);
}
return args;
}
JNIEXPORT jobjectArray JNICALL Java_JiebaForJava_cutForSearch
(JNIEnv *env, jobject obj, jstring content){
string str = env -> GetStringUTFChars(content,false);
vector<string> words;
jieba.CutForSearch(str,words);
jobjectArray args = 0 ;
jsize len = (int)words.size();
args = env-> NewObjectArray(len,env->FindClass("java/lang/String"),0);
jstring rtstr;
for(int i=0;i<len;i++){
rtstr = env -> NewStringUTF(words[i].c_str());
env->SetObjectArrayElement(args,i,rtstr);
}
return args;
}
JNIEXPORT void JNICALL Java_JiebaForJava_insertUserWord
(JNIEnv *env, jobject obj, jstring word){
string str = env->GetStringUTFChars(word,false);
jieba.InsertUserWord(str);
}
JNIEXPORT jobject JNICALL Java_JiebaForJava_tag
(JNIEnv *env, jobject obj, jstring content){
vector<pair<string, string> > tagres;
string str = env->GetStringUTFChars(content,false);
jieba.Tag(str,tagres);
//create map
jclass c_Map = env->FindClass("java/util/HashMap");
jmethodID m_init=(env)->GetMethodID(c_Map,"<init>","()V");
jobject o_map = env->NewObject(c_Map,m_init,"");
jmethodID m_put = (env)->GetMethodID(c_Map,"put","(Ljava/lang/Object;Ljava/lang/Object;)Ljava/lang/Object;");
jsize len = (int)tagres.size();
for(int i=0;i<len;i++){
env->CallObjectMethod(o_map,m_put,env->NewStringUTF(tagres[i].first.c_str()), env->NewStringUTF(tagres[i].second.c_str()));
}
return o_map;
}
JNIEXPORT void JNICALL Java_JiebaForJava_loadUserDict
(JNIEnv *env, jobject obj, jstring path){
string spath = env->GetStringUTFChars(path,false);
dict_trie.LoadUserDict(spath);
}
5,编写完成后,在linux下面构建.so动态库
g++ -fPIC -c JiebaForJava.cpp -I /home/spark/jdk1.8.0_171/include -I /home/spark/jdk1.8.0_171/include/linux 里面的一些参数需要自己调整下,
随后运行 g++ -shared JiebaForJava.o -o JiebaForJava.so 至此生成了.so文件,就可以进行调用了