elasticsearch执行词频分析

@PostMapping("/executeAnalyze")
@ApiOperation("执行词频分析")
public WebResponse<List<FileUtilExecuteResult>> executeAnalyze(@RequestBody @Validated FileUtilDTO fileDTO) {
    List<FileUtilExecuteResult> fileUtilExecuteResults = elasticSearchService.executeAnalyze(fileDTO.getBatchNo(), fileDTO.getKeywords(), fileDTO.getSatisfyAll());
    return ResponseUtil.success(fileUtilExecuteResults, "执行成功");
}
@Override
    public List<FileUtilExecuteResult> executeAnalyze(String batchNo, List<String> keywords, String satisfyAll) {
        boolean isSatisfyAll = (satisfyAll == null || satisfyAll.equals("0")) ? false : true;
        List<FileUtilExecuteResult> fileUtilExecuteResults = new ArrayList<>();
        Date now = DateUtils.getNow();
        QueryWrapper<FileUtilContent> queryWrapper = new QueryWrapper<>();
        queryWrapper.eq("batch_no", batchNo);
        List<FileUtilContent> fileContentList = contractFileService.list(queryWrapper);
        String operateNo = IDUtil.generateNo("", redisTemplate);

        try {
            for (FileUtilContent fileUtilContent : fileContentList) {
                Map<String, Integer> keyWordMap = new HashMap<>();
                //文档id
                String id = fileUtilContent.getEsId();
                TermVectorsRequest request = new TermVectorsRequest(SystemConstant.ELASTICSEARCH_IDEX_NAME, id);
                request.setFields("content");
                request.setFieldStatistics(true);
                request.setTermStatistics(true);
                request.setPositions(true);
                request.setOffsets(true);
                request.setPayloads(true);
                TermVectorsResponse termResponse =
                        highLevelClient.termvectors(request, RequestOptions.DEFAULT);

                // 是否找到文档
                boolean found = termResponse.getFound();

                if (!found) {
                    continue;
                }

//                long docVersion = termResponse.getDocVersion();
//                log.info("文档版本: {}", docVersion);

                List<TermVectorsResponse.TermVector> termVectorsList = termResponse.getTermVectorsList();
                for (TermVectorsResponse.TermVector termVector : termVectorsList) {
                    List<TermVectorsResponse.TermVector.Term> terms = termVector.getTerms();
                    for (TermVectorsResponse.TermVector.Term term : terms) {
                        for (String keyword : keywords) {
                            if (keyword.equals(term.getTerm())) {
                                Integer docFreq = term.getTermFreq();
//                                if (keyWordMap.containsKey(keyWord)) {
//                                    Integer oldKeyCount = keyWordMap.get(keyWord);
//                                    Integer newKeyCount= oldKeyCount +docFreq;
//                                    keyWordMap.put(keyWord,newKeyCount);
//                                }else{
//                                    keyWordMap.put(keyWord,docFreq);
//                                }
                                keyWordMap.put(keyword, docFreq);
                            }
                        }
                        //System.out.println("----term---" + term.getTerm() + "  -DocFreq:-" + term.getDocFreq() + "  -TermFreq:-" + term.getTermFreq() + "--" + term.getTokens());
                    }
                }

                if (!keyWordMap.isEmpty()) {
                    if (isSatisfyAll && !(keyWordMap.size() == keywords.size())) {
                        continue;
                    }
                    FileUtilExecuteResult fileUtilExecuteResult = new FileUtilExecuteResult();
                    fileUtilExecuteResult.setId(IDUtil.genUUID());
                    fileUtilExecuteResult.setBatchNo(batchNo);
                    fileUtilExecuteResult.setCreateTime(now);
                    fileUtilExecuteResult.setOperateNo(operateNo);
                    fileUtilExecuteResult.setTitle(fileUtilContent.getTitle());
                    fileUtilExecuteResult.setSerialNo(fileUtilContent.getSerialNo());
                    fileUtilExecuteResult.setResult(keyWordMap.toString());
                    fileUtilExecuteResult.setPackageName(fileUtilContent.getPackageName());
                    fileUtilExecuteResult.setContractNo(fileUtilContent.getContractNo());
                    fileUtilExecuteResult.setContractName(fileUtilContent.getContractName());
                    fileUtilExecuteResultService.save(fileUtilExecuteResult);
                    fileUtilExecuteResults.add(fileUtilExecuteResult);
                }

            }
        } catch (IOException e) {
            e.printStackTrace();
        }
        return fileUtilExecuteResults;
    }
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值