背景说明:
项目面临多合作方集成挑战,涉及复杂的业务场景和接口字段间的数据映射,需手动配置映射表达式,效率低下,接口字段非常多时很费时费力。为优化此过程,项目采用向量模型技术,通过对现有合作伙伴的配置数据学习,自动识别并推荐最合适的字段映射表达式。以授信场景为例,尽管身份证号映射目标(X.idNo)固定,合作方所用字段名多样,如身份证号、证件号码、证件id等。通过运用大模型的自然语言处理能力,能精准识别用户查询的“证件号码”意图,确保映射至正确的X.idNo字段,而非误配至如手机号等其他字段,极大地提升了对接新合作方时的自动化程度与准确性。
最终实现效果:如下图,在接口字段配置页面,无需再手动填充,点击“一键生成”,自动适配出表达式内容。
pom文件添加langchain4j的依赖
此处用0.31.0是因为该版本向量查询支持元数据匹配,后面代码会使用到元数据
<dependencies>
<dependency>
<groupId>dev.langchain4j</groupId>
<artifactId>langchain4j</artifactId>
<version>0.31.0</version>
</dependency>
<dependency>
<groupId>dev.langchain4j</groupId>
<artifactId>langchain4j-core</artifactId>
<version>0.31.0</version>
</dependency>
<dependency>
<groupId>dev.langchain4j</groupId>
<artifactId>langchain4j-open-ai</artifactId>
<version>0.27.1</version>
</dependency>
</dependencies>
yml增加配置,配置openAi地址(或代理地址),apikey
langchain4j:
openAi:
baseUrl: https://xxxx
apiKey: xxxxx
自定义langchain4j配置类,并将聊天模型、向量模型、向量存储定义为bean,交由spring容器管理
// 配置类
@Data
@Configuration
@ConfigurationProperties("langchain4j")
public class LangChainProperties {
private LanguageModelProperties openAi;
@Data
public static class LanguageModelProperties {
private String baseUrl;
private String apiKey;
}
}
// 配置类,将openAi定义为bean,交由容器管理
@Configuration
@EnableConfigurationProperties(LangChainProperties.class)
public class LangChainConfig {
@Bean
@ConditionalOnClass(OpenAiChatModel.class)
public OpenAiChatModel openAiChatModel(LangChainProperties langChainProperties) {
// openAi聊天模型
return OpenAiChatModel.builder()
.baseUrl(langChainProperties.getOpenAi().getBaseUrl())
.apiKey(langChainProperties.getOpenAi().getApiKey())
.build();
}
@Bean
@ConditionalOnClass(OpenAiEmbeddingModel.class)
public OpenAiEmbeddingModel openAiEmbeddingModel(LangChainProperties langChainProperties) {
// openAi向量模型
return OpenAiEmbeddingModel.builder()
.baseUrl(langChainProperties.getOpenAi().getBaseUrl())
.apiKey(langChainProperties.getOpenAi().getApiKey()).build();
}
@Bean
public InMemoryEmbeddingStore<TextSegment> inMemoryEmbeddingStore() {
// 本地向量存储
return new InMemoryEmbeddingStore<>();
}
}
// 枚举类
@Getter
public enum InterfaceTypeEnum {
CREDIT("creditApply", "预审", List.of("授信申请", "授信查询", "用信申请")),
LOAN("loanApply", "放款", List.of("借款申请", "借款结果查询", "放款申请"));
private String code;
private String desc;
private List<String> suptDesc;
InterfaceTypeEnum(String code, String desc, List<String> suptDesc) {
this.code = code;
this.desc = desc;
this.suptDesc = suptDesc;
}
public static Map<String, InterfaceTypeEnum> map = new HashMap<>();
static {
Stream.of(values()).forEach(v -> map.put(v.getCode(), v));
}
}
文本分割器,解析表达式文本,包含接口类型、字段名称、表达式;其中字段名称为向量文本内容,接口类型、表达式存储在metadata元数据中
@Slf4j
public class FundDocumentSplitter implements DocumentSplitter {
@Override
public List<TextSegment> split(Document document) {
List<TextSegment> segments = new ArrayList<>();
String[] parts = document.text().split("\n");
for (String part : parts) {
try {
String[] splits = part.split("\\@@");
if (splits.length != 3) {
continue;
}
if (StringUtils.isBlank(splits[2])) {
continue;
}
TextSegment textSegment = TextSegment.from(splits[1]);
packageMetadata(textSegment, splits[0], splits[2]);
segments.add(textSegment);
} catch (Exception e) {
log.error("数据分割异常part:{}, msg:{}",part, e.getMessage());
}
}
return segments;
}
private void packageMetadata(TextSegment textSegment, String interfaceDesc, String expression) {
textSegment.metadata().put("interfaceType", getInterfaceType(interfaceDesc)); // 接口类型
textSegment.metadata().put("expression", expression); // 表达式
}
private String getInterfaceType(String interfaceDesc) {
Map<String, InterfaceTypeEnum> map = InterfaceTypeEnum.map;
for (Map.Entry<String, InterfaceTypeEnum> entry : map.entrySet()) {
if (entry.getValue().getSuptDesc().contains(interfaceDesc)) {
return entry.getKey();
}
}
return "";
}
}
表达式模板示例
利用spring容器监听机制,在服务启动完成后,将文本内容加载到向量数据库中
@Service
@Slf4j
public class DocumentLoader implements ApplicationContextAware {
private final OpenAiEmbeddingModel openAiEmbeddingModel;
private final InMemoryEmbeddingStore<TextSegment> inMemoryEmbeddingStore;
private final ResourceLoader resourceLoader;
public DocumentLoader(OpenAiEmbeddingModel openAiEmbeddingModel, InMemoryEmbeddingStore<TextSegment> inMemoryEmbeddingStore, ResourceLoader resourceLoader) {
this.openAiEmbeddingModel = openAiEmbeddingModel;
this.inMemoryEmbeddingStore = inMemoryEmbeddingStore;
this.resourceLoader = resourceLoader;
}
@Override
public void setApplicationContext(ApplicationContext applicationContext) {
try {
// 1、将所有接口字段描述进行向量化
// 导入文本数据
log.info("开始加载向量数据");
Document document = getDocument();
DocumentSplitter splitter = new FundDocumentSplitter();
// 对数据进行切分
List<TextSegment> segments = splitter.split(document);
// 根据向量模型获取向量数据
List<Embedding> embeddings = openAiEmbeddingModel.embedAll(segments).content();
// 2、对配置的接口描述进行向量化
List<TextSegment> interSegments = new ArrayList<>();
Map<String, InterfaceTypeEnum> map = InterfaceTypeEnum.map;
for (Map.Entry<String, InterfaceTypeEnum> entry : map.entrySet()) {
InterfaceTypeEnum value = entry.getValue();
value.getSuptDesc().forEach(desc -> {
TextSegment textSegment = TextSegment.textSegment(desc);
textSegment.metadata().put("interfaceType", value.getCode()); // 接口类型
textSegment.metadata().put("type", "interface"); // 数据类型
interSegments.add(textSegment);
});
}
List<Embedding> interEmbeddings = openAiEmbeddingModel.embedAll(interSegments).content();
interSegments.addAll(segments);
interEmbeddings.addAll(embeddings);
// 向量数据存储到InMemoryEmbeddingStore内存中
inMemoryEmbeddingStore.addAll(interEmbeddings, interSegments);
log.info("向量数据加载完毕");
} catch (Exception e) {
log.error("向量数据初始化加载异常", e);
}
}
// 加载文本内容
private Document getDocument() throws URISyntaxException, IOException {
// 加载并解析文件
// Path documentPath = Paths.get(DocumentLoader.class.getClassLoader().getResource("fund_expression.txt").toURI());
DocumentParser documentParser = new TextDocumentParser();
String documentPath = "/documentPath/fund_expression.txt";
fileWrite();
return FileSystemDocumentLoader.loadDocument(documentPath, documentParser);
}
private void fileWrite() throws IOException {
File outputFile = new File("/documentPath");
if (!outputFile.exists()) {
outputFile.mkdirs();
}
File fileName = new File(outputFile, "fund_expression.txt");
if (!fileName.exists()) {
fileName.createNewFile();
}
try (InputStream inputStream = resourceLoader.getResource("classpath:fund_expression.txt").getInputStream();
FileOutputStream outputStream = new FileOutputStream(fileName)) {
// 创建一个缓冲区来提高读写效率
byte[] buffer = new byte[1024]; // 缓冲区大小可以根据实际情况调整
int length;
// 读取输入流并写入到输出流中
while ((length = inputStream.read(buffer)) != -1) {
outputStream.write(buffer, 0, length);
}
} catch (IOException e) {
log.error("写入文件异常", e);
}
}
}
service查询业务逻辑,通过接口描述、字段名称匹配出合适的向量模型,从匹配结果的向量元数据中获取到表达式
@Service
@Slf4j
public class ExpressionService {
private final OpenAiEmbeddingModel openAiEmbeddingModel;
private final InMemoryEmbeddingStore<TextSegment> inMemoryEmbeddingStore;
public ExpressionService(OpenAiEmbeddingModel openAiEmbeddingModel, InMemoryEmbeddingStore<TextSegment> inMemoryEmbeddingStore) {
this.openAiEmbeddingModel = openAiEmbeddingModel;
this.inMemoryEmbeddingStore = inMemoryEmbeddingStore;
}
public String queryExpression(String interMsg, String fieldMsg) {
Response<Embedding> embed = openAiEmbeddingModel.embed(fieldMsg);
String interfaceType = getInterfaceType(interMsg);
EmbeddingSearchRequest searchRequest = new EmbeddingSearchRequest(embed.content(), 1,
0.90, new IsIn("interfaceType", List.of(interfaceType)));
EmbeddingSearchResult<TextSegment> searchResult = inMemoryEmbeddingStore.search(searchRequest);
for (EmbeddingMatch<TextSegment> embeddingMatch : searchResult.matches()) {
String expression = embeddingMatch.embedded().metadata().getString("expression");
log.info("接口描述:{}, 字段描述:{}, 匹配的表达式:{}", interMsg, fieldMsg, expression);
return expression;
}
return "";
}
private String getInterfaceType(String msg) {
Response<Embedding> embed = openAiEmbeddingModel.embed(msg);
EmbeddingSearchRequest searchRequest = new EmbeddingSearchRequest(embed.content(), 1,
0.99, new IsIn("type", List.of("interface")));
EmbeddingSearchResult<TextSegment> searchResult = inMemoryEmbeddingStore.search(searchRequest);
List<EmbeddingMatch<TextSegment>> result = searchResult.matches();
if (CollectionUtils.isEmpty(searchResult.matches())) {
return "";
}
for (EmbeddingMatch<TextSegment> embeddingMatch : result) {
String interfaceType = embeddingMatch.embedded().metadata().getString("interfaceType");
log.info("接口描述:{}, 匹配的接口类型为:{}", embeddingMatch.embedded().text(), interfaceType);
return interfaceType;
}
return "";
}
}
controller层代码
@RestController
@RequestMapping("/api/expression")
@RequiredArgsConstructor
public class ExpressionController {
private final ExpressionService expressionService;
@PostMapping(value = {"/query"})
public ResponseEntity<BaseResponseDTO> queryExpression(@RequestBody ExpressionQueryRequestDTO requestDTO) {
ExpressionQueryResponseDTO responseDTO = new ExpressionQueryResponseDTO(ResponseCodeEnum.SUCCESS.getCode(), ResponseCodeEnum.SUCCESS.getDescription());
responseDTO.setData(expressionService.queryExpression(requestDTO.getInterMsg(), requestDTO.getFieldMsg()));
return ResponseEntity.ok(responseDTO);
}
}
postman调用结果