langchain4j实战:向量模型EmbeddingModel落地实践

背景说明:
项目面临多合作方集成挑战,涉及复杂的业务场景和接口字段间的数据映射,需手动配置映射表达式,效率低下,接口字段非常多时很费时费力。为优化此过程,项目采用向量模型技术,通过对现有合作伙伴的配置数据学习,自动识别并推荐最合适的字段映射表达式。以授信场景为例,尽管身份证号映射目标(X.idNo)固定,合作方所用字段名多样,如身份证号、证件号码、证件id等。通过运用大模型的自然语言处理能力,能精准识别用户查询的“证件号码”意图,确保映射至正确的X.idNo字段,而非误配至如手机号等其他字段,极大地提升了对接新合作方时的自动化程度与准确性。
最终实现效果:如下图,在接口字段配置页面,无需再手动填充,点击“一键生成”,自动适配出表达式内容。
在这里插入图片描述

pom文件添加langchain4j的依赖
此处用0.31.0是因为该版本向量查询支持元数据匹配,后面代码会使用到元数据

    <dependencies>
        <dependency>
            <groupId>dev.langchain4j</groupId>
            <artifactId>langchain4j</artifactId>
            <version>0.31.0</version>
        </dependency>
        <dependency>
            <groupId>dev.langchain4j</groupId>
            <artifactId>langchain4j-core</artifactId>
            <version>0.31.0</version>
        </dependency>
        <dependency>
            <groupId>dev.langchain4j</groupId>
            <artifactId>langchain4j-open-ai</artifactId>
            <version>0.27.1</version>
        </dependency>
    </dependencies>

yml增加配置,配置openAi地址(或代理地址),apikey

langchain4j:
  openAi:
    baseUrl: https://xxxx
    apiKey: xxxxx

自定义langchain4j配置类,并将聊天模型、向量模型、向量存储定义为bean,交由spring容器管理

// 配置类
@Data
@Configuration
@ConfigurationProperties("langchain4j")
public class LangChainProperties {

    private LanguageModelProperties openAi;

    @Data
    public static class LanguageModelProperties {
        private String baseUrl;
        private String apiKey;
    }
}

// 配置类,将openAi定义为bean,交由容器管理
@Configuration
@EnableConfigurationProperties(LangChainProperties.class)
public class LangChainConfig {

    @Bean
    @ConditionalOnClass(OpenAiChatModel.class)
    public OpenAiChatModel openAiChatModel(LangChainProperties langChainProperties) {
        // openAi聊天模型
        return OpenAiChatModel.builder()
                .baseUrl(langChainProperties.getOpenAi().getBaseUrl())
                .apiKey(langChainProperties.getOpenAi().getApiKey())
                .build();
    }

    @Bean
    @ConditionalOnClass(OpenAiEmbeddingModel.class)
    public OpenAiEmbeddingModel openAiEmbeddingModel(LangChainProperties langChainProperties) {
        // openAi向量模型
        return OpenAiEmbeddingModel.builder()
                .baseUrl(langChainProperties.getOpenAi().getBaseUrl())
                .apiKey(langChainProperties.getOpenAi().getApiKey()).build();
    }

    @Bean
    public InMemoryEmbeddingStore<TextSegment> inMemoryEmbeddingStore() {
        // 本地向量存储
        return new InMemoryEmbeddingStore<>();
    }
}


// 枚举类
@Getter
public enum InterfaceTypeEnum {
    CREDIT("creditApply", "预审", List.of("授信申请", "授信查询", "用信申请")),
    LOAN("loanApply", "放款", List.of("借款申请", "借款结果查询", "放款申请"));
    private String code;
    private String desc;
    private List<String> suptDesc;

    InterfaceTypeEnum(String code, String desc, List<String> suptDesc) {
        this.code = code;
        this.desc = desc;
        this.suptDesc = suptDesc;
    }

    public static Map<String, InterfaceTypeEnum> map = new HashMap<>();

    static {
        Stream.of(values()).forEach(v -> map.put(v.getCode(), v));
    }

}

文本分割器,解析表达式文本,包含接口类型、字段名称、表达式;其中字段名称为向量文本内容,接口类型、表达式存储在metadata元数据中

@Slf4j
public class FundDocumentSplitter implements DocumentSplitter {
    @Override
    public List<TextSegment> split(Document document) {
        List<TextSegment> segments = new ArrayList<>();
        String[] parts = document.text().split("\n");
        for (String part : parts) {
            try {
                String[] splits = part.split("\\@@");
                if (splits.length != 3) {
                    continue;
                }
                if (StringUtils.isBlank(splits[2])) {
                    continue;
                }
                TextSegment textSegment = TextSegment.from(splits[1]);
                packageMetadata(textSegment, splits[0], splits[2]);
                segments.add(textSegment);
            } catch (Exception e) {
                log.error("数据分割异常part:{}, msg:{}",part, e.getMessage());
            }
        }
        return segments;
    }

    private void packageMetadata(TextSegment textSegment, String interfaceDesc, String expression) {
        textSegment.metadata().put("interfaceType", getInterfaceType(interfaceDesc)); // 接口类型
        textSegment.metadata().put("expression", expression); // 表达式
    }

    private String getInterfaceType(String interfaceDesc) {
        Map<String, InterfaceTypeEnum> map = InterfaceTypeEnum.map;
        for (Map.Entry<String, InterfaceTypeEnum> entry : map.entrySet()) {
            if (entry.getValue().getSuptDesc().contains(interfaceDesc)) {
                return entry.getKey();
            }
        }
        return "";
    }
}

表达式模板示例
在这里插入图片描述

利用spring容器监听机制,在服务启动完成后,将文本内容加载到向量数据库中

@Service
@Slf4j
public class DocumentLoader implements ApplicationContextAware {

    private final OpenAiEmbeddingModel openAiEmbeddingModel;
    private final InMemoryEmbeddingStore<TextSegment> inMemoryEmbeddingStore;
    private final ResourceLoader resourceLoader;

    public DocumentLoader(OpenAiEmbeddingModel openAiEmbeddingModel, InMemoryEmbeddingStore<TextSegment> inMemoryEmbeddingStore, ResourceLoader resourceLoader) {
        this.openAiEmbeddingModel = openAiEmbeddingModel;
        this.inMemoryEmbeddingStore = inMemoryEmbeddingStore;
        this.resourceLoader = resourceLoader;
    }

    @Override
    public void setApplicationContext(ApplicationContext applicationContext) {
        try {
            // 1、将所有接口字段描述进行向量化
            // 导入文本数据
            log.info("开始加载向量数据");
            Document document = getDocument();
            DocumentSplitter splitter = new FundDocumentSplitter();
            // 对数据进行切分
            List<TextSegment> segments = splitter.split(document);
            // 根据向量模型获取向量数据
            List<Embedding> embeddings = openAiEmbeddingModel.embedAll(segments).content();
            // 2、对配置的接口描述进行向量化
            List<TextSegment> interSegments = new ArrayList<>();
            Map<String, InterfaceTypeEnum> map = InterfaceTypeEnum.map;
            for (Map.Entry<String, InterfaceTypeEnum> entry : map.entrySet()) {
                InterfaceTypeEnum value = entry.getValue();
                value.getSuptDesc().forEach(desc -> {
                    TextSegment textSegment = TextSegment.textSegment(desc);
                    textSegment.metadata().put("interfaceType", value.getCode()); // 接口类型
                    textSegment.metadata().put("type", "interface"); // 数据类型
                    interSegments.add(textSegment);
                });
            }
            List<Embedding> interEmbeddings = openAiEmbeddingModel.embedAll(interSegments).content();
            interSegments.addAll(segments);
            interEmbeddings.addAll(embeddings);
            // 向量数据存储到InMemoryEmbeddingStore内存中
            inMemoryEmbeddingStore.addAll(interEmbeddings, interSegments);
            log.info("向量数据加载完毕");
        } catch (Exception e) {
            log.error("向量数据初始化加载异常", e);
        }

    }


    // 加载文本内容
    private Document getDocument() throws URISyntaxException, IOException {
        // 加载并解析文件
//        Path documentPath = Paths.get(DocumentLoader.class.getClassLoader().getResource("fund_expression.txt").toURI());
        DocumentParser documentParser = new TextDocumentParser();
        String documentPath = "/documentPath/fund_expression.txt";
        fileWrite();
        return FileSystemDocumentLoader.loadDocument(documentPath, documentParser);
    }

    private void fileWrite() throws IOException {
        File outputFile = new File("/documentPath");
        if (!outputFile.exists()) {
            outputFile.mkdirs();
        }
        File fileName = new File(outputFile, "fund_expression.txt");
        if (!fileName.exists()) {
            fileName.createNewFile();
        }
        try (InputStream inputStream = resourceLoader.getResource("classpath:fund_expression.txt").getInputStream();
             FileOutputStream outputStream = new FileOutputStream(fileName)) {
            // 创建一个缓冲区来提高读写效率
            byte[] buffer = new byte[1024]; // 缓冲区大小可以根据实际情况调整
            int length;
            // 读取输入流并写入到输出流中
            while ((length = inputStream.read(buffer)) != -1) {
                outputStream.write(buffer, 0, length);
            }
        } catch (IOException e) {
            log.error("写入文件异常", e);
        }
    }

}

service查询业务逻辑,通过接口描述、字段名称匹配出合适的向量模型,从匹配结果的向量元数据中获取到表达式

@Service
@Slf4j
public class ExpressionService {
    private final OpenAiEmbeddingModel openAiEmbeddingModel;
    private final InMemoryEmbeddingStore<TextSegment> inMemoryEmbeddingStore;

    public ExpressionService(OpenAiEmbeddingModel openAiEmbeddingModel, InMemoryEmbeddingStore<TextSegment> inMemoryEmbeddingStore) {
        this.openAiEmbeddingModel = openAiEmbeddingModel;
        this.inMemoryEmbeddingStore = inMemoryEmbeddingStore;
    }

    public String queryExpression(String interMsg, String fieldMsg) {
        Response<Embedding> embed = openAiEmbeddingModel.embed(fieldMsg);
        String interfaceType = getInterfaceType(interMsg);
        EmbeddingSearchRequest searchRequest = new EmbeddingSearchRequest(embed.content(), 1,
                0.90, new IsIn("interfaceType", List.of(interfaceType)));
        EmbeddingSearchResult<TextSegment> searchResult = inMemoryEmbeddingStore.search(searchRequest);
        for (EmbeddingMatch<TextSegment> embeddingMatch : searchResult.matches()) {
            String expression = embeddingMatch.embedded().metadata().getString("expression");
            log.info("接口描述:{}, 字段描述:{}, 匹配的表达式:{}", interMsg, fieldMsg, expression);
            return expression;
        }
        return "";
    }

    private String getInterfaceType(String msg) {
        Response<Embedding> embed = openAiEmbeddingModel.embed(msg);
        EmbeddingSearchRequest searchRequest = new EmbeddingSearchRequest(embed.content(), 1,
                0.99, new IsIn("type", List.of("interface")));
        EmbeddingSearchResult<TextSegment> searchResult = inMemoryEmbeddingStore.search(searchRequest);
        List<EmbeddingMatch<TextSegment>> result = searchResult.matches();
        if (CollectionUtils.isEmpty(searchResult.matches())) {
            return "";
        }
        for (EmbeddingMatch<TextSegment> embeddingMatch : result) {
            String interfaceType = embeddingMatch.embedded().metadata().getString("interfaceType");
            log.info("接口描述:{}, 匹配的接口类型为:{}", embeddingMatch.embedded().text(), interfaceType);
            return interfaceType;
        }
        return "";
    }
}

controller层代码

@RestController
@RequestMapping("/api/expression")
@RequiredArgsConstructor
public class ExpressionController {

    private final ExpressionService expressionService;

    @PostMapping(value = {"/query"})
    public ResponseEntity<BaseResponseDTO> queryExpression(@RequestBody ExpressionQueryRequestDTO requestDTO) {
        ExpressionQueryResponseDTO responseDTO = new ExpressionQueryResponseDTO(ResponseCodeEnum.SUCCESS.getCode(), ResponseCodeEnum.SUCCESS.getDescription());
        responseDTO.setData(expressionService.queryExpression(requestDTO.getInterMsg(), requestDTO.getFieldMsg()));
        return ResponseEntity.ok(responseDTO);
    }
}

postman调用结果
在这里插入图片描述

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值