判断是否重复试题

3 篇文章 0 订阅

场景:新增选择题,首先判断数据库中是否已存在,不存在才新增-
判断是否有重复试题-将库中所有试题加载进Es中,题干截取30字,各个选项截取30字逗号分割,截取是为了节省内存
新增试题的时候,将试题进行分词-取几个关键词进行ES查询即可-(查询的是疑似重复题,最终人为判断是否重复)

Es的映射如下

GET /ques620/_mapping
PUT ques620
    {
        "mappings": {
        "properties": {
            "id": {
                "type": "long",
                        "index": false,
                        "doc_values": false
            },           
            "labelcode": {
                "type": "keyword"
            },
            "content": {
                "type": "text",
                        "analyzer": "ik_smart"
            },
           
            "option": {
                "type": "text",
                        "analyzer": "ik_smart"
            }            
        }
    }

加载所有试题入库(题干+选项)

 //将试题加载到ES里面
    @Test
    public void uploadQuestionES() throws IOException {
        for (int i = 0; i <2 ; i++) {
            //获取要加载到ES里的数据,过滤到HTML,截取部分
            List<QuestionMapping> questionMappingList= questionService.getESList(i);
            System.out.println(questionMappingList.size());
            System.out.println(JSONArray.toJSONString(questionMappingList));
            //试题存储到ES中,指定索引下
            elasticSaveService.saveProductAsIndices2(questionMappingList);
        }

    }

 /**
     * 为了去重使用-试题内容不全-被截取部分
     * 过滤掉HTML标签-题干保留30个字,各个选项保留30个字
     *
     * @param pageNo
     * @return
     */
    public List<QuestionMapping> getESList(int pageNo){

        Page<QuestionMapping> page = new Page<>(pageNo, 50);

        List<QuestionMapping> quList= questionDao.queryPage(page);
        quList.stream().forEach(item ->{
            item.setSubCount(null);
            List<QuestionKey> keyList= questionDao.queryKeyById(item.getId());
            List<String> keyStr=new ArrayList<>();
            keyList.stream().forEach(key ->{
                keyStr.add(HtmlUtils.filterStr(key.getContent()));
            });
            String optionStr= String.join(",", keyStr);
            //TODO 替换掉Html标签
            String contentStr=item.getContent();
            item.setContent(HtmlUtils.filterStr(contentStr));
            item.setOption(optionStr);
        });

        return  quList;

    }

/**
     * 批量存储Es
     * @param questionMappings
     * @return
     * @throws IOException
     */
    public boolean saveProductAsIndices2(List<QuestionMapping> questionMappings) throws IOException {
        BulkRequest bulkRequest = new BulkRequest();//批量操作
        for (QuestionMapping qu : questionMappings) {
            IndexRequest indexRequest = new IndexRequest("ques620");//指定要插入到哪个索引下
            indexRequest.id(String.valueOf(qu.getId()));
            String s = JSON.toJSONString(qu);
            indexRequest.source(s, XContentType.JSON);//需要保存到ES的内容,说明内容类型是json字符串
            bulkRequest.add(indexRequest);//添加索引操作
        }
        //执行批量 BulkRequest bulkRequest, RequestOptions options
        BulkResponse bulkResponse = restHighLevelClient.bulk(bulkRequest, GulimallElasticSearchConfig.COMMON_OPTIONS);
        boolean hasFailures = bulkResponse.hasFailures();
        List<String> collect = Arrays.asList(bulkResponse.getItems()).stream().map(item -> {
            return item.getId();
        }).collect(Collectors.toList());

        log.info("入库完成:{}",collect);

        return !hasFailures;
    }

在ES中查找疑似重复题

//搜索相似题
    @Test
   public void getQueryQuestio() throws Exception {
        String contentWord="小华家的电灯丝断了,他把灯泡晃了晃使灯丝又搭上了,再用的时候会发现:";
        String keyWord="A.灯比原来亮了 B.灯比原来暗了 C.跟原来一样";
        //获取题干关键字
        Set<String> contentSet= elasticSaveService.getKeyWord(contentWord,3);
        System.out.println(contentSet.toString());
        //获取选项关键字
        Set<String> keySet= elasticSaveService.getKeyWord(keyWord,3);
        System.out.println(keySet.toString());
        //在ES中查找疑似重复题
        List<QuestionMapping>  lis=elasticSaveService.queryByKeyword(contentSet,keySet);
        System.out.println("170---"+JSONArray.toJSONString(lis));
    }

/**
     * 对字符串进行分词-获取关键字
     * @param text
     * @return
     * @throws Exception
     */
    public Set<String> getKeyWord(String text,int maxKeyNum)throws Exception{
        AnalyzeRequest request = AnalyzeRequest.withGlobalAnalyzer("ik_smart",
                text);
        AnalyzeResponse response = restHighLevelClient.indices().analyze(request, RequestOptions.DEFAULT);
        List<AnalyzeResponse.AnalyzeToken> tokens= response.getTokens();
        List<String> keyWords=new ArrayList<>();
        List<String> strList= tokens.stream().map(item->{ return item.getTerm();}).collect(Collectors.toList());
        Set<String> needStr= HtmlUtils.filterNeedStr(strList,maxKeyNum);
        return needStr;
    }

 /**
     * 过滤字符串集合-获取needNum个较长的字符串-作为关键字搜索
     * @param list
     * @param needNum
     * @return
     */
    public static Set<String> filterNeedStr(List<String> list, int needNum){
        Set<String> new3Str=new HashSet<>();
        Set<String> new2Str=new HashSet<>();
        Set<String> new1Str=new HashSet<>();
        int defulte=3;//默认取3个关键字
        if(needNum!=0){
            defulte=needNum;
        }
        int a=0;
        int b=0;
        for (String s:list){
            if(s.length()>2){

                if(!new3Str.contains(s)){
                    a++;
                    new3Str.add(s);
                }
                if(a>defulte){
                    return new3Str;
                }
            }
            if(s.length()==2){
                new2Str.add(s);
            }else if(s.length()==1){
                new1Str.add(s);
            }
        }
        if(new3Str.size()<defulte){
            for (String s2: new2Str){

                new3Str.add(s2);
                if(new3Str.size()>defulte){
                    break;
                }
            }
        }
        if(new3Str.size()<defulte){
            for (String s1: new1Str){

                new3Str.add(s1);
                if(new3Str.size()>6){
                    break;
                }
            }
        }
        return new3Str;
    }

 /**
     * 根据关键字查询
     * @param contentSet 标题关键字
     * @param optionSet 选项关键字
     * @return
     */
    public List<QuestionMapping> queryByKeyword(Set<String> contentSet,Set<String> optionSet){
        SearchRequest request = bulidSearchRequest(contentSet,optionSet);
        try {
            SearchResponse searchResponse = restHighLevelClient.search(request, GulimallElasticSearchConfig.COMMON_OPTIONS);
            List<QuestionMapping> result = bulidSearchResult(searchResponse);

           // System.out.println(JSONArray.toJSONString(result));
            return result;

        } catch (IOException e) {
            e.printStackTrace();
        }
//        return searchResult;

        return null;

    }

//构建DSL查询
private SearchRequest bulidSearchRequest(Set<String> contentSet,Set<String> optionSet){
        if(contentSet==null&&optionSet==null){
            return null;
        }
        SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
        //1. 构建bool query
        BoolQueryBuilder boolQueryBuilder = new BoolQueryBuilder();
        //1.1 bool must
        if (contentSet!=null&&contentSet.size()>0) {
            for (String s:contentSet){
                boolQueryBuilder.must(QueryBuilders.matchPhraseQuery("content", s));
            }
        } if(optionSet!=null&&optionSet.size()>0){
            for (String p:optionSet){
                boolQueryBuilder.must(QueryBuilders.matchPhraseQuery("option", p));
            }
        }
        //1. bool query构建完成
        searchSourceBuilder.query(boolQueryBuilder);
 log.debug("构建的DSL语句 {}",searchSourceBuilder.toString());
        System.out.println("构建的DSL语句 {}"+searchSourceBuilder.toString());

        SearchRequest request = new SearchRequest(new String[]{"questions","quselect","ques620"}, searchSourceBuilder);
        return request;
    }


//查询结果的查看
private List<QuestionMapping> bulidSearchResult(SearchResponse searchResponse) {
        List<QuestionMapping> result = new ArrayList<>();
        SearchHits hits = searchResponse.getHits();
        //1. 封装查询到的信息
        if (hits.getHits() != null && hits.getHits().length > 0) {
            List<SkuEsModel> skuEsModels = new ArrayList<>();
            for (SearchHit hit : hits) {
                String sourceAsString = hit.getSourceAsString();
                QuestionMapping questionMapping = JSON.parseObject(sourceAsString, QuestionMapping.class);
                //设置高亮属性

                result.add(questionMapping);
            }
        }
        return  result;
    }
//搜索相似题
    @Test
   public void getQueryQuestio() throws Exception {
        String contentWord="下列哪项不符合全胃肠外营养所用的营养液的要求";
        String keyWord="维生素和微量元素";

        Long startTime=System.currentTimeMillis();
        //获取题干关键字
        Set<String> contentSet= elasticSaveService.getKeyWord(contentWord,3);
        System.out.println("获取题干关键字---"+contentSet.toString());
        //获取选项关键字
        Set<String> keySet= elasticSaveService.getKeyWord(keyWord,3);
        System.out.println("获取选项关键字----"+keySet.toString());
        //在ES中查找疑似重复题
        List<QuestionMapping>  lis=elasticSaveService.queryByKeyword(contentSet,keySet);
        System.out.println("170---"+JSONArray.toJSONString(lis));
        Long endTime=System.currentTimeMillis();
        System.out.println("查找用时-------"+(endTime-startTime));
    }
获取题干关键字---[要求, 所用, 营养液, 不符合]
获取选项关键字----[微量元素, 和, 维生素]
构建的DSL语句 {}{"query":{"bool":{"must":[{"match_phrase":{"content":{"query":"要求","slop":0,"zero_terms_query":"NONE","boost":1.0}}},{"match_phrase":{"content":{"query":"所用","slop":0,"zero_terms_query":"NONE","boost":1.0}}},{"match_phrase":{"content":{"query":"营养液","slop":0,"zero_terms_query":"NONE","boost":1.0}}},{"match_phrase":{"content":{"query":"不符合","slop":0,"zero_terms_query":"NONE","boost":1.0}}},{"match_phrase":{"option":{"query":"微量元素","slop":0,"zero_terms_query":"NONE","boost":1.0}}},{"match_phrase":{"option":{"query":"和","slop":0,"zero_terms_query":"NONE","boost":1.0}}},{"match_phrase":{"option":{"query":"维生素","slop":0,"zero_terms_query":"NONE","boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}}}
170---[{"content":"下列哪项不符合全胃肠外营养所用的营养液的要求","id":19032,"labelCode":"A1","option":"{\"A\":\"每日供氮应达0.2~0.24 g/kg体重\",\"B\":\"适量补充胰岛素和脂肪乳剂\",\"C\":\"含有适量的电解质、维生素和微量元素\",\"D\":\"氮和热量之比为1:100kcal\",\"E\":\"所补充的必需氨基酸和非必需氨基酸的含量一般应为1:2\"}","parentId":0,"right":"D","rigthCode":8,"sourceOrgId":-100,"status":1,"subCount":0}]
查找用时-------382

  • 1
    点赞
  • 2
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值