SpringBoot下使用HanLP包

SpringBoot下使用HanLP包

HanLP是自然语言处理包,这里不多做赘述,由于可能会出现需要把HanLP提供的资源文件(词库)放在SpringBoot的Resources下,所以特地在此记录自己尝试成功的经历
关于引入依赖和词库文件的解压不多做说明,首先看一下提供的配置文件

文件名hanlp.properties

#2\u5143\u8bed\u6cd5\u8bcd\u5178\u8def\u5f84
BiGramDictionaryPath=data/dictionary/CoreNatureDictionary.ngram.txt
#\u505c\u7528\u8bcd\u8bcd\u5178\u8def\u5f84
CoreStopWordDictionaryPath=data/dictionary/stopwords.txt
#\u540c\u4e49\u8bcd\u8bcd\u5178\u8def\u5f84
CoreSynonymDictionaryDictionaryPath=data/dictionary/synonym/CoreSynonym.txt
#\u4eba\u540d\u8bcd\u5178\u8def\u5f84
PersonDictionaryPath=data/dictionary/person/nr.txt
#\u4eba\u540d\u8bcd\u5178\u8f6c\u79fb\u77e9\u9635\u8def\u5f84
PersonDictionaryTrPath=data/dictionary/person/nr.tr.txt
#\u7e41\u7b80\u8bcd\u5178\u6839\u76ee\u5f55
tcDictionaryRoot=data/dictionary/tc
#\u81ea\u5b9a\u4e49\u8bcd\u5178\u8def\u5f84\uff0c\u7528;\u9694\u5f00\u591a\u4e2a\u81ea\u5b9a\u4e49\u8bcd\u5178\uff0c\u7a7a\u683c\u5f00\u5934\u8868\u793a\u5728\u540c\u4e00\u4e2a\u76ee\u5f55\uff0c\u4f7f\u7528\u201c\u6587\u4ef6\u540d \u8bcd\u6027\u201d\u5f62\u5f0f\u5219\u8868\u793a\u8fd9\u4e2a\u8bcd\u5178\u7684\u8bcd\u6027\u9ed8\u8ba4\u662f\u8be5\u8bcd\u6027\u3002\u4f18\u5148\u7ea7\u9012\u51cf\u3002
#\u53e6\u5916data/dictionary/custom/CustomDictionary.txt\u662f\u4e2a\u9ad8\u8d28\u91cf\u7684\u8bcd\u5e93\uff0c\u8bf7\u4e0d\u8981\u5220\u9664\u3002\u6240\u6709\u8bcd\u5178\u7edf\u4e00\u4f7f\u7528UTF-8\u7f16\u7801\u3002
CustomDictionaryPath=data/dictionary/custom/CustomDictionary.txt; \u73b0\u4ee3\u6c49\u8bed\u8865\u5145\u8bcd\u5e93.txt; \u5168\u56fd\u5730\u540d\u5927\u5168.txt ns; \u4eba\u540d\u8bcd\u5178.txt; \u673a\u6784\u540d\u8bcd\u5178.txt; user_dic.txt; \u4e0a\u6d77\u5730\u540d.txt ns;data/dictionary/person/nrf.txt nrf;
#CRF\u5206\u8bcd\u6a21\u578b\u8def\u5f84
CRFSegmentModelPath=data/model/segment/CRFSegmentModel.txt
#HMM\u5206\u8bcd\u6a21\u578b
HMMSegmentModelPath=data/model/segment/HMMSegmentModel.bin
#\u5206\u8bcd\u7ed3\u679c\u662f\u5426\u5c55\u793a\u8bcd\u6027
ShowTermNature=true
#IO\u9002\u914d\u5668\uff0c\u5b9e\u73b0com.hankcs.hanlp.corpus.io.IIOAdapter\u63a5\u53e3\u4ee5\u5728\u4e0d\u540c\u7684\u5e73\u53f0\uff08Hadoop\u3001Redis\u7b49\uff09\u4e0a\u8fd0\u884cHanLP
#\u9ed8\u8ba4\u7684IO\u9002\u914d\u5668\u5982\u4e0b\uff0c\u8be5\u9002\u914d\u5668\u662f\u57fa\u4e8e\u666e\u901a\u6587\u4ef6\u7cfb\u7edf\u7684\u3002
#IOAdapter=com.hankcs.hanlp.corpus.io.FileIOAdapter
#\u611f\u77e5\u673a\u8bcd\u6cd5\u5206\u6790\u5668
PerceptronCWSModelPath=data/model/perceptron/pku199801/cws.bin
PerceptronPOSModelPath=data/model/perceptron/pku199801/pos.bin
PerceptronNERModelPath=data/model/perceptron/pku199801/ner.bin
#CRF\u8bcd\u6cd5\u5206\u6790\u5668
CRFCWSModelPath=data/model/crf/pku199801/cws.bin
CRFPOSModelPath=data/model/crf/pku199801/pos.bin
CRFNERModelPath=data/model/crf/pku199801/ner.bin
#\u66f4\u591a\u914d\u7f6e\u9879\u8bf7\u53c2\u8003 https://github.com/hankcs/HanLP/blob/master/src/main/java/com/hankcs/hanlp/HanLP.java#L59 \u81ea\u884c\u6dfb\u52a0

然后有一个地方需要注意下
在HanLP类下

public static void init(String rootPath) {
            Properties p = new Properties();

            String prePath;
            int i;
            int lastSplash;
            try {
                ClassLoader loader = Thread.currentThread().getContextClassLoader();
                if (loader == null) {
                    loader = HanLP.Config.class.getClassLoader();
                }

                p.load(new InputStreamReader((InputStream)(Predefine.HANLP_PROPERTIES_PATH == null ? loader.getResourceAsStream("hanlp.properties") : new FileInputStream(Predefine.HANLP_PROPERTIES_PATH)), "UTF-8"));
                String root = p.getProperty("root", "");
                if (StringUtil.isEmpty(root)) {
                    root = StringUtil.getString(rootPath);
                }

                System.out.println(root);
                root = root.replaceAll("\\\\", "/");
                if (root.length() > 0 && !root.endsWith("/")) {
                    root = root + "/";
                }

                CoreDictionaryPath = root + p.getProperty("CoreDictionaryPath", CoreDictionaryPath);
                CoreDictionaryTransformMatrixDictionaryPath = root + p.getProperty("CoreDictionaryTransformMatrixDictionaryPath", CoreDictionaryTransformMatrixDictionaryPath);
                BiGramDictionaryPath = root + p.getProperty("BiGramDictionaryPath", BiGramDictionaryPath);
                CoreStopWordDictionaryPath = root + p.getProperty("CoreStopWordDictionaryPath", CoreStopWordDictionaryPath);
                CoreSynonymDictionaryDictionaryPath = root + p.getProperty("CoreSynonymDictionaryDictionaryPath", CoreSynonymDictionaryDictionaryPath);
                PersonDictionaryPath = root + p.getProperty("PersonDictionaryPath", PersonDictionaryPath);
                PersonDictionaryTrPath = root + p.getProperty("PersonDictionaryTrPath", PersonDictionaryTrPath);
                String[] pathArray = p.getProperty("CustomDictionaryPath", "data/dictionary/custom/CustomDictionary.txt").split(";");
                prePath = root;

                for(i = 0; i < pathArray.length; ++i) {
                    if (pathArray[i].startsWith(" ")) {
                        pathArray[i] = prePath + pathArray[i].trim();
                    } else {
                        pathArray[i] = root + pathArray[i];
                        lastSplash = pathArray[i].lastIndexOf(47);
                        if (lastSplash != -1) {
                            prePath = pathArray[i].substring(0, lastSplash + 1);
                        }
                    }
                }

                CustomDictionaryPath = pathArray;
                tcDictionaryRoot = root + p.getProperty("tcDictionaryRoot", tcDictionaryRoot);
                if (!tcDictionaryRoot.endsWith("/")) {
                    tcDictionaryRoot = tcDictionaryRoot + '/';
                }

                PinyinDictionaryPath = root + p.getProperty("PinyinDictionaryPath", PinyinDictionaryPath);
                TranslatedPersonDictionaryPath = root + p.getProperty("TranslatedPersonDictionaryPath", TranslatedPersonDictionaryPath);
                JapanesePersonDictionaryPath = root + p.getProperty("JapanesePersonDictionaryPath", JapanesePersonDictionaryPath);
                PlaceDictionaryPath = root + p.getProperty("PlaceDictionaryPath", PlaceDictionaryPath);
                PlaceDictionaryTrPath = root + p.getProperty("PlaceDictionaryTrPath", PlaceDictionaryTrPath);
                OrganizationDictionaryPath = root + p.getProperty("OrganizationDictionaryPath", OrganizationDictionaryPath);
                OrganizationDictionaryTrPath = root + p.getProperty("OrganizationDictionaryTrPath", OrganizationDictionaryTrPath);
                CharTypePath = root + p.getProperty("CharTypePath", CharTypePath);
                CharTablePath = root + p.getProperty("CharTablePath", CharTablePath);
                PartOfSpeechTagDictionary = root + p.getProperty("PartOfSpeechTagDictionary", PartOfSpeechTagDictionary);
                WordNatureModelPath = root + p.getProperty("WordNatureModelPath", WordNatureModelPath);
                MaxEntModelPath = root + p.getProperty("MaxEntModelPath", MaxEntModelPath);
                NNParserModelPath = root + p.getProperty("NNParserModelPath", NNParserModelPath);
                CRFSegmentModelPath = root + p.getProperty("CRFSegmentModelPath", CRFSegmentModelPath);
                CRFDependencyModelPath = root + p.getProperty("CRFDependencyModelPath", CRFDependencyModelPath);
                HMMSegmentModelPath = root + p.getProperty("HMMSegmentModelPath", HMMSegmentModelPath);
                CRFCWSModelPath = root + p.getProperty("CRFCWSModelPath", CRFCWSModelPath);
                CRFPOSModelPath = root + p.getProperty("CRFPOSModelPath", CRFPOSModelPath);
                CRFNERModelPath = root + p.getProperty("CRFNERModelPath", CRFNERModelPath);
                PerceptronCWSModelPath = root + p.getProperty("PerceptronCWSModelPath", PerceptronCWSModelPath);
                PerceptronPOSModelPath = root + p.getProperty("PerceptronPOSModelPath", PerceptronPOSModelPath);
                PerceptronNERModelPath = root + p.getProperty("PerceptronNERModelPath", PerceptronNERModelPath);
                ShowTermNature = "true".equals(p.getProperty("ShowTermNature", "true"));
                Normalization = "true".equals(p.getProperty("Normalization", "false"));
                String ioAdapterClassName = p.getProperty("IOAdapter");
                if (ioAdapterClassName != null) {
                    try {
                        Class<?> clazz = Class.forName(ioAdapterClassName);
                        Constructor<?> ctor = clazz.getConstructor();
                        Object instance = ctor.newInstance();
                        if (instance != null) {
                            IOAdapter = (IIOAdapter)instance;
                        }
                    } catch (ClassNotFoundException var10) {
                        Predefine.logger.warning(String.format("找不到IO适配器类: %s ,请检查第三方插件jar包", ioAdapterClassName));
                    } catch (NoSuchMethodException var11) {
                        Predefine.logger.warning(String.format("工厂类[%s]没有默认构造方法,不符合要求", ioAdapterClassName));
                    } catch (SecurityException var12) {
                        Predefine.logger.warning(String.format("工厂类[%s]默认构造方法无法访问,不符合要求", ioAdapterClassName));
                    } catch (Exception var13) {
                        Predefine.logger.warning(String.format("工厂类[%s]构造失败:%s\n", ioAdapterClassName, TextUtility.exceptionToString(var13)));
                    }
                }
            } catch (Exception var14) {
                StringBuilder sbInfo = new StringBuilder("========Tips========\n请将hanlp.properties放在下列目录:\n");
                String classPath = (String)System.getProperties().get("java.class.path");
                if (classPath != null) {
                    String[] var8;
                    lastSplash = (var8 = classPath.split(File.pathSeparator)).length;

                    for(i = 0; i < lastSplash; ++i) {
                        prePath = var8[i];
                        if ((new File(prePath)).isDirectory()) {
                            sbInfo.append(prePath).append('\n');
                        }
                    }
                }

                sbInfo.append("Web项目则请放到下列目录:\nWebapp/WEB-INF/lib\nWebapp/WEB-INF/classes\nAppserver/lib\nJRE/lib\n");
                sbInfo.append("并且编辑root=PARENT/path/to/your/data\n");
                sbInfo.append("现在HanLP将尝试从").append(System.getProperties().get("user.dir")).append("读取data……");
                Predefine.logger.severe("没有找到hanlp.properties,可能会导致找不到data\n" + sbInfo);
            }

        }

这个函数需要输入一个root,也就是词库文件夹的绝对路径,如果设置的路径为空则会从properties中读取配置的路径,也就是我们需要的resources下的路径

HanLP下的Config类用于载入配置路径

 public static final class Config {
        public static boolean DEBUG = false;
        public static String CoreDictionaryPath = "data/dictionary/CoreNatureDictionary.txt";
        public static String CoreDictionaryTransformMatrixDictionaryPath = "data/dictionary/CoreNatureDictionary.tr.txt";
        public static String[] CustomDictionaryPath = new String[]{"data/dictionary/custom/CustomDictionary.txt"};
        public static String BiGramDictionaryPath = "data/dictionary/CoreNatureDictionary.ngram.txt";
        public static String CoreStopWordDictionaryPath = "data/dictionary/stopwords.txt";
        public static String CoreSynonymDictionaryDictionaryPath = "data/dictionary/synonym/CoreSynonym.txt";
        public static String PersonDictionaryPath = "data/dictionary/person/nr.txt";
        public static String PersonDictionaryTrPath = "data/dictionary/person/nr.tr.txt";
        public static String PlaceDictionaryPath = "data/dictionary/place/ns.txt";
        public static String PlaceDictionaryTrPath = "data/dictionary/place/ns.tr.txt";
        public static String OrganizationDictionaryPath = "data/dictionary/organization/nt.txt";
        public static String OrganizationDictionaryTrPath = "data/dictionary/organization/nt.tr.txt";
        public static String tcDictionaryRoot = "data/dictionary/tc/";
        public static String PinyinDictionaryPath = "data/dictionary/pinyin/pinyin.txt";
        public static String TranslatedPersonDictionaryPath = "data/dictionary/person/nrf.txt";
        public static String JapanesePersonDictionaryPath = "data/dictionary/person/nrj.txt";
        public static String CharTypePath = "data/dictionary/other/CharType.bin";
        public static String CharTablePath = "data/dictionary/other/CharTable.txt";
        public static String PartOfSpeechTagDictionary = "data/dictionary/other/TagPKU98.csv";
        public static String WordNatureModelPath = "data/model/dependency/WordNature.txt";
        public static String MaxEntModelPath = "data/model/dependency/MaxEntModel.txt";
        public static String NNParserModelPath = "data/model/dependency/NNParserModel.txt";
        public static String CRFSegmentModelPath = "data/model/segment/CRFSegmentModel.txt";
        public static String HMMSegmentModelPath = "data/model/segment/HMMSegmentModel.bin";
        /** @deprecated */
        public static String CRFDependencyModelPath = "data/model/dependency/CRFDependencyModelMini.txt";
        public static String CRFCWSModelPath = "data/model/crf/pku199801/cws.bin";
        public static String CRFPOSModelPath = "data/model/crf/pku199801/pos.bin";
        public static String CRFNERModelPath = "data/model/crf/pku199801/ner.bin";
        public static String PerceptronCWSModelPath = "data/model/perceptron/pku199801/cws.bin";
        public static String PerceptronPOSModelPath = "data/model/perceptron/pku199801/pos.bin";
        public static String PerceptronNERModelPath = "data/model/perceptron/pku199801/ner.bin";
        public static boolean ShowTermNature = true;
        public static boolean Normalization = false;
        public static IIOAdapter IOAdapter;

        public Config() {
        }

在此了解基础上我们需要继承一个接口重写两个方法

/**
 * @Description: HanLP读取静态资源
 * @author: Amethyst
 * @date: 2019-11-28 11:12
 * @update_by:
 * @tags:
 */
public class HanLPResourcesAdapter implements IIOAdapter {
    @Override
    public InputStream open(String path) throws IOException {
       ClassPathResource resource =new ClassPathResource(path); 
           IntputStream is=new FileIntputStream(resource.getFile());
        return is;
    }

    @Override
    public OutputStream create(String path) throws IOException {
        ClassPathResource resource=new ClassPathResource(path);
        OutputStream os=new FileOutputStream(resource.getFile());
        return os;
    }
}

在重写方法的基础上我们还需要对properties进行两方面的改动

#添加根路径设置为空
root=
#添加适配器为我们自己适配器的路径
IOAdapter=x.x.HanLPResourcesAdapter 

然后我们需要在调用前对HanLP进行一个初始化,调用一下init函数,设置根路径为空

//在载入包时需要调用此函数进行词库的读取初始化
 HanLP.Config.init("");

参考链接Spring Boot中对自然语言处理工具包hanlp的调用详解

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值