1.添加依赖
<dependency>
<groupId>com.github.plexpt</groupId>
<artifactId>chatgpt</artifactId>
<version>5.1.0</version>
</dependency>
<!-- 需要 okhttp 4.12.0的版本 -->
<dependency>
<groupId>com.squareup.okhttp3</groupId>
<artifactId>okhttp</artifactId>
<version>4.12.0</version>
</dependency>
<dependency>
<groupId>com.squareup.okhttp3</groupId>
<artifactId>okhttp-sse</artifactId>
<version>4.12.0</version>
</dependency>
2.使用ollama部署本地模型
3.问答
后端代码
流式输出使用sse短链接和前端进行交互,非流式就直接返回结果
import com.plexpt.chatgpt.ChatGPT;
import com.plexpt.chatgpt.ChatGPTStream;
import com.plexpt.chatgpt.entity.chat.ChatCompletion;
import com.plexpt.chatgpt.entity.chat.ChatCompletionResponse;
import lombok.extern.slf4j.Slf4j;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.web.bind.annotation.PostMapping;
import org.springframework.web.bind.annotation.RequestBody;
import org.springframework.web.bind.annotation.RequestMapping;
import org.springframework.web.bind.annotation.RestController;
import org.springframework.web.servlet.mvc.method.annotation.SseEmitter;
import javax.servlet.http.HttpServletResponse;
import java.util.List;
@RestController
@RequestMapping("/chat")
@Slf4j
public class ChatController {
private final ChatGPTStream chatGPTStream = ChatGPTStream.builder()
.apiHost("http://localhost:11434/")
.apiKeyList(ListUtils.of("sk-xxx"))
.build()
.init();
private final ChatGPT chatGPT = ChatGPT.builder()
.apiHost("http://localhost:11434/")
.apiKeyList(ListUtils.of("sk-xxx"))
.build()
.init();
@PostMapping("/completions")
public Object completions(@RequestBody ChatCompletion chatCompletion) {
HttpServletResponse response = ServletUtils.getResponse();
if (chatCompletion.getStream()) {
// 流式
response.setContentType("text/event-stream");
response.setCharacterEncoding("UTF-8");
SseEmitter sseEmitter = new SseEmitter(-1L);
SseStreamListener listener = new SseStreamListener(sseEmitter);
listener.setOnComplete(msg -> {
log.info("msg:{}", msg);
});
chatGPTStream.streamChatCompletion(chatCompletion, listener);
return sseEmitter;
} else {
// 非流式
response.setContentType("application/json");
response.setCharacterEncoding("UTF-8");
ChatCompletionResponse completion = chatGPT.chatCompletion(chatCompletion);
String msg = completion.getChoices().get(0).getMessage().getContent();
log.info("msg:{}", msg);
return completion;
}
}
}
import com.plexpt.chatgpt.entity.chat.ChatCompletionResponse;
import com.plexpt.chatgpt.util.SseHelper;
import lombok.Getter;
import lombok.Setter;
import lombok.SneakyThrows;
import lombok.extern.slf4j.Slf4j;
import okhttp3.Response;
import okhttp3.sse.EventSource;
import okhttp3.sse.EventSourceListener;
import org.springframework.web.servlet.mvc.method.annotation.SseEmitter;
import java.util.Objects;
import java.util.function.Consumer;
@Slf4j
public class SseStreamListener extends EventSourceListener {
// 最终将返回的结果消息
protected String lastMessage = "";
final SseEmitter sseEmitter;
public SseStreamListener(SseEmitter sseEmitter) {
this.sseEmitter = sseEmitter;
}
/**
* sse 推送消息
*
* @param message 消息
*/
public void onMsg(String message) {
SseHelper.send(sseEmitter, message);
}
public void onError(Throwable throwable, String response) {
SseHelper.complete(sseEmitter);
}
/**
* 完成后触发
*
* @param message the new message
*/
@Setter
@Getter
protected Consumer<String> onComplete = s -> {
};
/**
* 建立连接时触发
*
* @param eventSource
* @param response
*/
@Override
public void onOpen(EventSource eventSource, Response response) {
log.info("OpenAI建立sse连接...");
}
@Override
public void onClosed(EventSource eventSource) {
log.info("OpenAI关闭sse连接...");
// 后端主动断开sse
sseEmitter.complete();
}
/**
* 每次收到消息时触发
*/
@Override
public void onEvent(EventSource eventSource, String id, String type, String data) {
log.info("OpenAI返回数据:{}", data);
// 推送消息
onMsg(data);
if (data.equals("[DONE]")) {
log.info("OpenAI返回数据结束了");
onComplete.accept(lastMessage);
return;
}
ChatCompletionResponse response = JsonUtils.toBean(data, ChatCompletionResponse.class);
String content = response.getChoices().get(0).getDelta().getContent();
// 只需要判断不为null既可以,因为有时候会返回空格 所以不能用内容是否为空进行判断
if (content != null) {
lastMessage += content;
}
}
@SneakyThrows
@Override
public void onFailure(EventSource eventSource, Throwable t, Response response) {
try {
if (Objects.isNull(response)) {
return;
}
ResponseBody body = response.body();
if (Objects.nonNull(body)) {
log.error("OpenAI sse连接异常data:【{}】,异常:{}", body.string(), t.getMessage());
} else {
log.error("OpenAI sse连接异常data:【{}】,异常:{}", response, t.getMessage());
}
onComplete.accept(lastMessage);
} catch (Exception e) {
log.warn("onFailure error:{}", e.getMessage());
} finally {
eventSource.cancel();
}
}
}
前端代码
前端直接使用openai客户端
<template>
<div class="Chat">
<div v-for="(message,index) in messages" :key="index">
{{ message.content }}
<br>
</div>
<div v-if="generateIng">
<br>
{{ generateText }}
</div>
<el-input v-model="input"></el-input>
<el-button @click="handleSubmit">提交</el-button>
</div>
</template>
<script>
import OpenAI from 'openai';
export default {
name: 'Chat',
data() {
return {
messages: [],
input: '',
openaiClient: null,
// 正在生成中
generateIng: false,
generateText: ""
}
},
mounted() {
this.openaiClient = new OpenAI({
baseURL: "http://localhost:8501/demo",
apiKey: "sk-xxx",
dangerouslyAllowBrowser: true
})
},
methods: {
async handleSubmit() {
this.messages.push({
"role": "user",
"content": this.input
})
this.input = ""
this.generateIng = true
const stream = await this.openaiClient.beta.chat.completions.stream({
"model": "deepseek-r1:1.5b",
"messages": this.messages,
"stream": true,
})
for await (const chunk of stream) {
console.log("chunk", chunk);
this.generateText += chunk.choices[0]?.delta?.content || '';
}
this.generateIng = false
this.messages.push({
"role": "assistant",
"content": this.generateText
})
this.generateText = ""
}
}
}
</script>
<style lang='scss' scoped>
.Chat {
}
</style>
4.向量
// 单个input
EmbeddingRequest embeddingRequest = EmbeddingRequest.builder()
.input(ListUtils.of(prompt)) // 输入
.model("bge-m3")
.build();
List<Double> embeddings = chatGPT.createEmbeddings(embeddingRequest)
.getData().get(0).getEmbedding();
// 多个input
EmbeddingRequest embeddingRequest = EmbeddingRequest.builder()
.model("bge-m3")
.input(chunks) // 输入
.build();
EmbeddingResult embeddingResult = chatGPT.createEmbeddings(embeddingRequest);
List<EmbeddingData> embeddings = embeddingResult.getData();