基于node/egg.js/langchain.js实现openai对话 支持插件功能核心代码支持网络搜索 本地知识库问答 数据库知识上传切片

支持 网络搜索/远程知识库数据库对话 Pinecon 先来讲一下啊 网上那些什么node的langchain本地知识库对话的教程,文章一大堆,拽一堆专业词汇(拽啥,不一样是看的人家英文文档,用的翻译,搞得看个中文教程看的我都难受),看的迷迷糊糊的跟着他们用langchain搞下来,是能用,哎,不支持上下文对话, 就一个人工智障跟个鸡肋似的

我也就是用langchain来进行数据切片。

本地知识库对话主要逻辑用大白话讲其实就是 把用户的提问转换成向量(openai有提供的接口) ,

字符(string) = >向量(Array:[[122,399],000,....])

然后去远程向量数据库通过余弦相似度算法去检索(Pinecon有现成的文档不需要知道原理配置一下调用就行)有一定相似度的数据(也就是相关数据)

再然后把这些提示整理一下注入到openai的聊天上下中(role: 'system',这是一个openai中的角色参数,设置为系统,{ role: 'system', content: Here is the result of querying the remote vector database \n content: \n ${promptContent.text} })

远程向量数据库其实也一样就是把用户提供的pdf txt 什么的数据(先切片,因为有大小限制)转为向量存到数据库中

网络搜索同上

这种问答的基础流程 (langchain在这里最大的用处就是数据库上传的时候切片)

用户的提问=>转为向量=>去向量数据查询=>获取前几个搜索结果注入到会话提示中=>向openai发起请求 

调用main.js

//askQuestionData 是 openai接口请求的官方参数 详细去官网看
//添加对应插件 ( 网络搜索 / 本地知识库问答 ) 对应main.js中的方法 提供给openai调用
askQuestionData['functions'] = [
                {
                "name": "getNetworkPrompt",
                "description": "This is your Search engine. Calling this function will connect to the Internet to search and return relevant data",
                "parameters": {
                    "type": "object",
                    "properties": {
                        "q": {
                            "type": "string",
                            "description": "User raised questions"
                        }
                    },
                    "required": ["q"]
                }
            },
            {
                "name": "getLocalPrompt",
                "description": "Query the remote vector library and obtain relevant content based on user questions",
                "parameters": {
                    "type": "object",
                    "properties": {
                        "content": {
                            "type": "string",
                            "description": "User raised questions"
                        }
                         "namespace": {
                            "type": "string",
                            "description": "The name of the user's query database"
                        }
                    },
                    "required": ["content","namespace"]
                }
            }];
      service.openai.main.askQuestion(askQuestionData, {
                callbackFn: (res) => {
                    if (res.catch) { //网络出错 
                        console.log(res.catch, '网络出错')
                    } else if (res.error) { //请求出错
                        data.status = 'success'
                        data.content = res.error
                    } else {
                        const content = res.choices[0].delta.content !== undefined res.choices[0].delta.content : ''; 
                    }  
                    console.log(content,'流式响应数据')
                },
                readStreamCallback: (readStream) => {
                    console.log(readStream,'readStream,可以执行销毁停止操作')
                }
            });  

main.js
 

const Service = require('egg').Service;
const { Configuration, OpenAIApi } = require("openai");
const { PineconeClient } = require("@pinecone-database/pinecone");
const axios = require("axios");
const { htmlToText } = require('html-to-text');
const { encodingForModel } = require("js-tiktoken");
const { RecursiveCharacterTextSplitter } = require("langchain/text_splitter");
const { EPubLoader } = require("langchain/document_loaders/fs/epub");
const { PineconeStore } = require("langchain/vectorstores/pinecone");
const { OpenAIEmbeddings } = require("langchain/embeddings/openai");
const { PDFLoader } = require("langchain/document_loaders/fs/pdf");
const { TextLoader } = require("langchain/document_loaders/fs/text");
const { DocxLoader } = require("langchain/document_loaders/fs/docx");
const fs = require('fs');
class OpenaiService extends Service {
    questionConfiguration = {
        stream: true,
        model: "gpt-3.5-turbo"
    }
    askQuestionCurlParameter = {
        headers: {
            "Authorization": `Bearer ${this.ctx.app.config.openai.apiKey}`,  //openai apikey
            'Content-Type': 'application/json', // json 
        },
        timeout: 10000, //超时时间
        dataType: 'json', //数据类型 json
        streaming: true, //开始curl流响应 
        method: 'post',  //请求方式 post 
    }
    trySplicing(data) {
        try {
            return JSON.parse(data)
        } catch (error) {
            console.log(data, '拼接失败')
            return null
        }
    }
    dataProcessing(dataStream, BrokenData = []) {
        return String(dataStream).split("data:").filter((e) => e).flat(Infinity).map(item => {
            try {
                return JSON.parse(item)
            } catch (error) {
                if (item.split(/[\t\r\f\n\s]*/g).join('') == '[DONE]') {
                    return false
                } else {
                    let result = null;
                    if (BrokenData.length >= 1) {
                        result = this.trySplicing(`${BrokenData[BrokenData.length - 1]}${item}`)
                    }
                    if (result) {
                        BrokenData.pop();
                        return result;
                    } else {
                        BrokenData.push(item)
                        return false
                    }

                }
            }
        }).filter((e) => e)
    }
    routingParameterGeneration(parameter) {
        if (parameter == null || parameter == undefined) return;
        const parameterType = typeof parameter;
        if (parameterType == 'string') {
            let obj = {};
            let splitParameter = parameter.split('?');
            if (splitParameter.length <= 1) {
                console.warn("暂无路径参数");
                return
            }
            splitParameter[1].split('&').forEach(item => {
                if (item.indexOf('=') !== -1) {
                    let splitItem = item.split('=')
                    obj[splitItem[0]] = splitItem[1]
                } else {
                    console.warn("error"); 
                }
            })
            return obj
        } else if (parameterType == 'object') {
            let stringParameter = '?'
            let parameterEntries = Object.entries(parameter);
            parameterEntries.forEach((key, index) => {
                let itemString = `${key[0]}=${key[1]}${parameterEntries.length - 1 == index ? '' : '&'}`;
                stringParameter += itemString
            })
            return stringParameter
        }
    }
    /**
     * @description 查找网络数据
     * @param { string } value - 问题
     * @param { string } model - 必应/谷歌
     * @returns { Promise }   -  查询到的数据
     */
    async findNetworkdata(BingSearchParameter) {
        // console.log(BingSearchParameter, 'BingSearchParameter');
        return new Promise((resolve, reject) => {
            axios.get(`${this.ctx.app.config}${BingSearchParameter}`).then(res => resolve(res)).catch(err => reject(err))
        })
    }
    /**
     * @description html解析为文本
     * @param {Object|Array} html - html数据 
     * @returns { Array } - 返回的数据
     */
    parsingHTML(html) {
        if (!html) throw new Error('缺少html参数');
        const htmlValues = Array.isArray(html) ? html : [html];
        return htmlValues.map(item => htmlToText(item));
    }
    // 查找相近数据
    findSimilarData(pineconeParameter) {
        const { apiKey, environment } = this.ctx.app.config.pinecone;
        return new Promise((resolve, reject) => {
            //实例向量库
            const client = new PineconeClient();
            //初始化向量库 https://first-database-a81708f.svc.us-east-1-aws.pinecone.io
            client.init({
                apiKey,
                environment,
            }).then(() => {
                const pineconeIndex = client.Index('first-database');
                const queryRequest = Object.assign({
                    vector: [],
                    topK: 10,
                    includeValues: true,
                    includeMetadata: true,
                    namespace: "",
                }, pineconeParameter);
                pineconeIndex.query({ queryRequest }).then(res => {
                    resolve(res)
                }).catch(err => {
                    reject(err)
                })
            })
        })
    }
    //数据向量
    getEmbeddings(input, model = 'text-embedding-ada-002') {
        return new Promise((resolve, reject) => {
            const configuration = new Configuration({
                apiKey: this.ctx.app.config.openai.apiKey,
                basePath: this.ctx.app.config.openai.basePath
            });
            const openai = new OpenAIApi(configuration);
            openai.createEmbedding({
                model,
                input
            }).then(res => {
                resolve(res)
            }).catch(err => {
                reject(err)
            })
        })
    }
    /**
     * @description 获取网络搜索提示
     * @param { Object } parameter - 搜索参数   
     */
    async getNetworkPrompt(parameter = {}) {
        if (!parameter.q) return;
        let webSearchParameter = this.routingParameterGeneration(Object.assign({...this.app.ctx.config.GoogleSearch}, parameter, { q: encodeURI(parameter.q) }));
        let searchResults = await this.findNetworkdata(webSearchParameter); //获取相关网址  
        let webDatas = await this.parsingWebPageContent(searchResults.data.items, { question: parameter.q, top: 1 });
        return {
            role: 'system',
            content: ` You can refer to the following online search results for ${decodeURIComponent(parameter.q)} to improve your answer. If it is not helpful to you, please ignore this prompt. Network search data \n ${webDatas.map((item, index) => ` ${index + 1} \n content:${item.value} \n Network source:${item.link} \n `).join('\n')}`
        }
    }
    async webCorrelationAnalysis({ question, websites }) {
        websites = websites.map(item => { let { title, link, snippet } = item; return { title, link, snippet } });
        websites.length = 5;
        const configuration = new Configuration({
            apiKey: this.ctx.app.config.openai.apiKey,
            basePath: this.ctx.app.config.openai.basePath
        });
        const openai = new OpenAIApi(configuration);
        let chatCompletion = [];
        await openai.createChatCompletion({
            model: "gpt-3.5-turbo-0613",
            messages: [
                { role: "system", content: `Query a question related to the user's question in the provided JSON list and pass the JSON as a parameter to call the parseWebPageContent method` },
                {
                    role: "system",
                    content: `JSON data: \n ${websites}`
                },
                { role: "user", content: question }
            ],
            functions: [{
                "name": "parseWebPageContent",
                "description": "This is a method of obtaining web page content by parsing JSON data",
                "parameters": {
                    "type": "object",
                    "properties": {
                        "link": {
                            "type": "string",
                            "description": `This is the 'link' field in JSON data`,
                        },
                        "title": {
                            "type": "string",
                            "description": `This is the 'title' field in JSON data`,
                        },
                        "snippet": {
                            "type": "string",
                            "description": `This is the 'snippet' field in JSON data`,
                        }
                    },
                    "required": ["link", "title", "snippet"],
                }
            }]
        }).then(res => {
            if (res.data.choices[0].message?.function_call) {
                let { arguments: parameters } = res.data.choices[0].message?.function_call;
                chatCompletion.push(JSON.parse(parameters));
            }
        }).catch(err => {
            console.log(err)
        });
        return chatCompletion;
    }
    /**
     * @description 通过地址解析网页内容 并切生成摘要   
     * @param {Array} websites - 网址
     * @param { Number } top 
     */
    async parsingWebPageContent(websites, { question, top = 1 }) {
        console.log(websites.forEach(item=>{console.log(item.snippet)}))
        if (websites.length > top) websites.length = top;
        let websiteValues = await Promise.all(websites.map(async item => {
            let { link } = item;
            let html;
            await axios.get(link).then(res => { html = res }).catch(err => { html = null; console.log(err) });
            let value = '';
            if (html) {
                value = await this.generateSummary(question, this.parsingHTML(html.data));
            }
            return { value, link }
        }));
        return websiteValues
    }
    /**
     * @description OpenAi 通过提问生成摘要
     */
    async generateSummary(question, value) {
        let enc = encodingForModel('gpt-3.5-turbo');
        let valueTokens = enc.encode(value.join(","));
        let residueTokensLength = 3096 - enc.encode(question).length;
        if (valueTokens.length > residueTokensLength) valueTokens.length = residueTokensLength;
        const configuration = new Configuration({
            apiKey: this.ctx.app.config.openai.apiKey,
            basePath: this.ctx.app.config.openai.basePath
        });
        const openai = new OpenAIApi(configuration);
        let chatCompletion;
        await openai.createChatCompletion({
            model: "gpt-3.5-turbo",
            messages: [{ role: "system", content: `Summarize the provided network search content based on user questions` }, {
                role: "system",
                content: `Network search results: \n ${enc.decode(valueTokens)}`
            }, { role: "user", content: question }],
        }).then(res => {
            chatCompletion = res.data.choices[0].message.content
        }).catch(err => {
            console.log(err)
            chatCompletion = '';
        });
        return chatCompletion;
    }

    functionCallSplicing(FunctionConfiguration, item) {
        let { function_call = {} } = item.choices[0].delta;
        return Object.assign(FunctionConfiguration, function_call, { arguments: FunctionConfiguration.arguments + (function_call?.arguments || '') });
    }
    /**
     * @description 向openai发起提问
     * @param {object} questionConfiguration - openai提问参数
     * @param {Object} callBackConfiguration - 回调对象 
     * @param {Function} callBackConfiguration.callbackFn - 回调函数
     * @param {Function} callBackConfiguration.readStreamCallback - 可读流回调 返回参数诗可读流对象    questionConfiguration.stream:true
     */
    async askQuestion(questionConfiguration = this.questionConfiguration, callBackConfiguration) {
        const data = Object.assign({}, this.questionConfiguration, questionConfiguration);
        const askQuestionCurlParameter = Object.assign({}, this.askQuestionCurlParameter, { data });
        this.ctx.curl(`${this.ctx.app.config.openai.basePath}/chat/completions`, askQuestionCurlParameter).then(res => {
            let BrokenData = [];
            let FunctionConfiguration = { is: false, name: "", arguments: "" };
            if (askQuestionCurlParameter.data.stream) {
                if (callBackConfiguration.hasOwnProperty('readStreamCallback')) callBackConfiguration.readStreamCallback(res);
                res.res.on('data', dataStream => { 
                    let a = this.dataProcessing(dataStream, BrokenData);
                    if (a.length && a[0].choices[0].delta.hasOwnProperty('function_call')) FunctionConfiguration.is = true;
                    a.forEach(item => FunctionConfiguration.is ? this.functionCallSplicing(FunctionConfiguration, item) : callBackConfiguration.callbackFn(item));
                });
                res.res.on('close', () => {
                    if (FunctionConfiguration.is) this.functionCall(FunctionConfiguration, { messages: questionConfiguration.messages, callBackConfiguration });
                })
            } else {
                callBackConfiguration.callbackFn(res.data)
            }
        }).catch(error => {
            callBackConfiguration.callbackFn({ catch: error })
        })
    }
    getLocalPrompt({ content,namespace, pineconeParameter = {} }) {
        let enc = encodingForModel('gpt-3.5-turbo');
        let contentLength = enc.encode(content).length;
        return new Promise((resolve, reject) => {
            this.getEmbeddings(content).then(res => {
                let vector = res.data.data[0].embedding;
                this.findSimilarData({ ...pineconeParameter, vector, namespace }).then(res => {
                    let promptContent = res.matches.reduce((total, currentValue, currentIndex, arr) => {
                        if (currentIndex == 1) total = { text: total.metadata.text, textLength: enc.encode(total.metadata.text).length };
                        let calLength = 3596 - contentLength - total.textLength;
                        let remainingLength = calLength > 0 ? calLength : 0;
                        if (!remainingLength) return total;
                        let currentValueCodes = enc.encode(currentValue.metadata.text);
                        currentValueCodes.length = remainingLength;
                        let text = total.text + enc.decode(currentValueCodes);
                        let textLength = total.textLength + currentValueCodes.length;
                        return { text, textLength };
                    });
                    resolve({
                        role: 'system',
                        content: `Here is the result of querying the remote vector database \n content: \n ${promptContent.text} `
                    })
                }).catch(err => {
                    reject(err)
                })
            })
        })
    }
    async documentReading() {
        let { filepath, filename, } = this.ctx.request.files[0];
       let mimeType = filepath.split('.')[1];
        console.log('文件类型', mimeType)
        //加载分块
        const splitter = new RecursiveCharacterTextSplitter({
            chunkSize: 1000
        });
        // 加载
        const loader = this.documentClassification(filepath, mimeType);
        console.log('文档读取')
        const docs = await loader.load(); 
        // 分割
        const splitterDocs = await splitter.splitDocuments(docs);
        console.log( '文档分割完成');
        return { splitterDocs, useTokens: splitterDocs.length * 1000 }
    }
    async findNamespace() {
        let namespaces = await this.ctx.model.Namespace.findAll({
            where: {
                user_id: this.ctx.userinfo.id
            }
        });
        return namespaces
    }
    async createdOrUpdateNamespace() {
        let result = null;
        await this.ctx.model.Namespace.findOrCreate(
            {
                where: {
                    name:this.ctx.request.body.namespace, 
                    user_id: this.ctx.userinfo.id
                },
                defaults: {
                    name: this.ctx.request.body.namespace, 
                    user_id: this.ctx.userinfo.id,
                    upload_status: 'Ready',
                    details:this.ctx.request.body.describe,
                    title:this.ctx.request.body.title||''
                }
            }
        ).then(async (res) => {
            let namespace = res[0];
            if (namespace.upload_status == 'Loading'|| namespace.upload_status == 'Adding') this.ctx.throw(500, '当前空间正在使用');
            result = await namespace.update({
                upload_status: namespace.upload_status == 'Ready' ? 'Loading' : 'Adding'
            });
        }); 
        return result 
    }
    async fileUpload({ splitterDocs, useTokens }) {
        let upload_status ;
        const { apiKey, environment } = this.ctx.app.config.pinecone;
        const client = new PineconeClient();
        await client.init({ apiKey, environment });
        const pineconeIndex = client.Index('first-database');
        let namespace =  `${this.ctx.userinfo.id}-${this.ctx.request.body.namespace}`; 
        await PineconeStore.fromDocuments(splitterDocs, new OpenAIEmbeddings({
            openAIApiKey: this.ctx.app.config.openai.apiKey
        }, {
            basePath: this.ctx.app.config.openai.basePath
        }), {
            pineconeIndex,
            namespace,
        }).then(res => {
            console.log('操作成功');
            upload_status = 'Success'
        }).catch(err => {
            console.log(err, 'err');
            upload_status = 'Fail'
        }).finally(() => {
        let { filepath } = this.ctx.request.files[0]; 
            fs.unlink(filepath, (err) => {
                console.log(err, 'File deleted!');
            });
        });
        return upload_status
    }
    documentClassification(filepath, mimeType) {
        if (!filepath) throw new Error(`路径出现问题:${filepath}`)
        let loader = null;
        switch (mimeType) {
            case 'pdf':
                loader = new PDFLoader(filepath)
                break; 
            case 'epub':
                loader = new EPubLoader(filepath)
                break;
            case 'txt':
                loader = new TextLoader(filepath)
                break;
            case 'docx':
                loader = new DocxLoader(filepath)
                break;
            default:
                break;
        }
        if (!loader) throw new Error(`无法解析的类型:${mimeType}`)
        return loader;
    }
    /**
     * 
     * @param {Object} FunctionConfiguration - function_call对象
     * @param {*} askQuestionParameter  - askQuestion参数 用于调用完成继续对话 
     */
    functionCall(FunctionConfiguration, askQuestionParameter) { 
        console.log(FunctionConfiguration,'FunctionConfiguration')
        let { messages, callBackConfiguration } = askQuestionParameter;
        this[FunctionConfiguration.name](JSON.parse(FunctionConfiguration.arguments)).then(res => {  
            this.askQuestion({ messages: [res, messages[messages.length - 1]],model:'gpt-3.5-turbo-0613' }, callBackConfiguration)
        })
    }

}
module.exports = OpenaiService 

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 2
    评论
### 回答1: node scripts/build.js 是一个命令行指令,用于运行一个名为 build.js 的脚本文件。该脚本文件通常用于构建或打包项目,例如将多个文件合并成一个文件,压缩文件大小等。该指令需要在命令行中输入,并且需要在项目的根目录下执行。 ### 回答2: node scripts/build.js 是一个使用 Node.js 运行的脚本文件。 通常情况下,我们可以通过命令行运行该脚本文件,如 `node scripts/build.js`。 此脚本文件可能是为了构建项目或执行特定的构建任务而创建的。它可能包含一系列的构建步骤,例如编译代码、压缩文件、打包资源等。 通过运行该脚本文件,我们可以自动化执行这些构建任务,从而节省了手动执行每个步骤的时间和精力。 脚本文件的具体内容和功能取决于项目的需求和开发者的要求。在脚本中,我们可能会使用一些构建工具或库,如Webpack、Gulp、Grunt等,来帮助我们更有效地进行构建操作。 脚本文件可以根据我们的需要进行自定义和扩展。我们可以根据不同的开发环境或需求编写不同的脚本,实现不同的构建任务。 总之,通过命令行运行 node scripts/build.js,我们可以执行项目的构建任务,提高开发效率和代码质量。 ### 回答3: 执行`node scripts/build.js`命令时,会运行一个名为`build.js`的脚本文件。这个脚本文件主要用于构建(build)某个项目或应用程序。 一般来说,`build.js`脚本会包含一系列的任务或操作,用于处理项目中的各种资源和文件,以生成最终需要部署或发布的版本。这些操作可以包括: 1. 打包文件:将项目中的各个文件进行打包,将它们合并为一个或多个输出文件。通常使用工具如Webpack或Rollup进行打包,以优化文件大小和加载速度。 2. 转换代码:将源代码转换为可执行或可运行的形式。这可能涉及将ES6/ES7代码转换为ES5,使用Babel或TypeScript进行类型检查和编译等。 3. 优化资源:对图片、CSS、JavaScript等资源进行压缩和优化,以减少文件大小和提高访问速度。可以使用工具如imagemin、terser等来实现。 4. 生成文档:根据项目的注释或配置文件,生成项目的文档或API文档。JSDoc或TypeDoc是生成JavaScript文档的常用工具。 5. 运行测试:执行项目的单元测试或集成测试,以确保代码的质量和功能的正确性。一般使用测试框架如Jest、Mocha等来执进行自动化测试。 6. 清理旧文件:在构建之前,可能会先清理掉之前构建生成的旧文件,以确保每次构建都是基于最新的源代码。 `node scripts/build.js`的具体逻辑和功能取决于该脚本的实现内容。根据项目的需求和开发者的要求,可以自定义构建流程和操作,以满足特定的构建需求。

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 2
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值