Vanna.AI 使用记录

提示:文章写完后,目录可以自动生成,如何生成可参考右边的帮助文档


前言

记录Vanna.AI,通过 openAI + Chroma + [Mysql、Oracle、Microsoft SQL Server、SQLite] 的组合进行本地部署使用,参考官网的执行示例

前提:openAI账号属于付费版,没有接口的访问速率限制,出现速率限制可参考https://platform.openai.com/docs/guides/rate-limits/error-mitigation

框架代码:https://github.com/vanna-ai/vanna-flask


一、openAI + Chroma + Mysql

"""
使用组合:OpenAI + ChromaDB + MySQL
库包要求:pip install 'vanna[chromadb,openai,mysql]'
需要准备 openai的api_key,使用的模型是gpt-3.5-turbo

其中
from vanna.openai import OpenAI_Chat
from vanna.chromadb import ChromaDB_VectorStore


class BasicVannaService(ChromaDB_VectorStore, OpenAI_Chat):
    def __init__(self, config=None):
        ChromaDB_VectorStore.__init__(self, config=config)
        OpenAI_Chat.__init__(self, config=config)

"""

from vanna.exceptions import ValidationError

from services.basic_vanna_service import BasicVannaService

from entity.decorators import retry_with_exponential_backoff


class VannaMySQLService:

    def __init__(self, openai_api_key: str, openai_model_name: str, **kwargs):
        self.openai_api_key = openai_api_key

        self.openai_model_name = openai_model_name

        self.vn = BasicVannaService(config={'api_key': self.openai_api_key, 'model': self.openai_model_name})
        print(" ------------------ vanna_mysql_service: 初始化成功 -----------------------")

        self.vanna_connect_mysql(**kwargs)
        print(" ------------------ vanna_mysql_service: 链接数据库成功 -----------------------")

        self.vanna_mysql_training()
        print(" ------------------ vanna_mysql_service: 训练数据完成 -----------------------")

    def vanna_connect_mysql(self, **kwargs):
        """连接MySQL数据库"""
        try:
            self.vn.connect_to_mysql(
                host=kwargs.get('host'),
                dbname=kwargs.get('dbname'),
                user=kwargs.get('user'),
                password=kwargs.get('password'),
                port=kwargs.get('port')
            )
        except Exception as e:
            print(e)

    def run_sql(self, sql):
        return self.vn.run_sql(sql)

    def get_training_plan_generic(self, schema):
        return self.vn.get_training_plan_generic(schema)

    @retry_with_exponential_backoff
    def train(self, **kwargs):
        self.vn.train(**kwargs)

    def get_training_data(self):
        return self.vn.get_training_data()

    def vanna_mysql_training(self):
        """训练数据"""
        # ----------------------------计划训练-----------------------------------------------------
        df_information_schema = self.run_sql("SELECT * FROM INFORMATION_SCHEMA.COLUMNS")
        print(df_information_schema)
        print(" ------------------ vanna_mysql_service: 获取训练计划的通用方法 -----------------------")
        plan = self.get_training_plan_generic(df_information_schema)
        print(plan)
        self.train(plan=plan)

        # ----------------------------DDL训练-----------------------------------------------------
        self.train(ddl="""
            CREATE TABLE IF NOT EXISTS `user` (
                id INT PRIMARY KEY,
                name VARCHAR(100),
                age INT
            )
        """)

        # ----------------------------documentation训练-----------------------------------------------------
        print(" ------------------ vanna_mysql_service: 通过给定文档训练模型 -----------------------")
        self.train(
            documentation="Our business defines OTIF score as the percentage of orders that are delivered on time and "
                          "in full"
        )

        # ----------------------------sql训练-----------------------------------------------------
        try:
            print(" ------------------ vanna_mysql_service: 通过sql来训练模型 -----------------------")
            self.train(sql="SELECT * FROM `test.user` WHERE `age`>5;")
        except ValidationError as e:
            print(e)

        print(" ------------------ vanna_mysql_service: 获取训练数据 -----------------------")
        training_data = self.get_training_data()
        print(training_data)

    @retry_with_exponential_backoff
    def vanna_mysql_ask(self, **kwargs):
        return self.vn.ask(**kwargs)

    def get_vn(self):
        return self.vn


if __name__ == '__main__':
    key = ""  # openai 的key https://platform.openai.com/api-keys
    model_name = "gpt-3.5-turbo"
    host = "localhost"
    dbname = "test"
    user = "root"
    password = "123456"
    port = 3306
    vanna_server = VannaMySQLService(
        openai_api_key=key,
        openai_model_name=model_name,
        host=host,
        dbname=dbname,
        user=user,
        password=password, port=port
    )
    vn = vanna_server.get_vn()
    from vanna.flask import VannaFlaskApp

    app = VannaFlaskApp(vn)
    app.run()

代码中,Vanna的方法拆分是因为遇到了 openAI的接口访问速率限制的问题,依照openAI的解决方案添加滞后重试即装饰器retry_with_exponential_backoff,没有效果。。。

二、openAI + Chroma + Oracle

"""
使用组合:OpenAI + ChromaDB + Oracle
库包要求:
    pip install 'vanna[chromadb,openai,mssql]'
    pip install cx_Oracle
需要准备 openai的api_key,使用的模型是gpt-3.5-turbo

其中
from vanna.openai import OpenAI_Chat
from vanna.chromadb import ChromaDB_VectorStore


class BasicVannaService(ChromaDB_VectorStore, OpenAI_Chat):
    def __init__(self, config=None):
        ChromaDB_VectorStore.__init__(self, config=config)
        OpenAI_Chat.__init__(self, config=config)

"""
import cx_Oracle

from services.basic_vanna_service import BasicVannaService

from entity.decorators import retry_with_exponential_backoff


class VannaOracleService:

    def __init__(self, openai_api_key: str, openai_model_name: str, **kwargs):
        self.openai_api_key = openai_api_key

        self.openai_model_name = openai_model_name

        self.vn = BasicVannaService(config={'api_key': self.openai_api_key, 'model': self.openai_model_name})
        print(" ------------------ vanna_oracle_service: 初始化成功 -----------------------")

        self.vanna_connect_oracle(**kwargs)
        print(" ------------------ vanna_oracle_service: 链接数据库成功 -----------------------")

        self.vanna_oracle_training()
        print(" ------------------ vanna_oracle_service: 训练数据完成 -----------------------")

    def vanna_connect_oracle(self, **kwargs):
        """连接oracle数据库"""
        try:
            dsn = cx_Oracle.makedsn(kwargs.get('host'), kwargs.get('port'), kwargs.get("service_name"))
            self.vn.connect_to_oracle(user=kwargs.get('user'), password=kwargs.get('password'), dsn=dsn)
        except Exception as e:
            print(e)

    def run_sql(self, sql):
        return self.vn.run_sql(sql)

    def get_training_plan_generic(self, schema):
        return self.vn.get_training_plan_generic(schema)

    @retry_with_exponential_backoff
    def train(self, **kwargs):
        self.vn.train(**kwargs)

    def get_training_data(self):
        return self.vn.get_training_data()

    def vanna_oracle_training(self):
        """训练数据"""
        # ----------------------------计划训练-----------------------------------------------------
        try:
            df_information_schema = self.run_sql(
                """
               SELECT main.OWNER AS table_catalog,
                      main.OWNER AS table_schema,
                      main.*
               FROM all_tab_cols main
               WHERE main.OWNER = 'IQMS'
                 AND main.TABLE_NAME LIKE 'ARINVT%'
               """)
            print(df_information_schema)
            print(" ------------------ vanna_oracle_service: 获取训练计划的通用方法 -----------------------")
            plan = self.get_training_plan_generic(df_information_schema)
            print(plan)
            self.train(plan=plan)
        except Exception as e:
            print("error in plan training:{}".format(e))

        # ----------------------------DDL训练-----------------------------------------------------
        # self.train(ddl="""
        #     CREATE TABLE IF NOT EXISTS `user` (
        #         id INT PRIMARY KEY,
        #         name VARCHAR(100),
        #         age INT
        #     )
        # """)

        # ----------------------------documentation训练-----------------------------------------------------
        # print(" ------------------ vanna_oracle_service: 通过给定文档训练模型 -----------------------")
        # self.train(
        #     documentation="Our business defines OTIF score as the percentage of orders that are delivered on time and "
        #                   "in full"
        # )

        # ----------------------------sql训练-----------------------------------------------------
        # try:
        #     print(" ------------------ vanna_oracle_service: 通过sql来训练模型 -----------------------")
        #     self.train(sql="SELECT * FROM `test.user` WHERE `age`>5;")
        # except ValidationError as e:
        #     print(e)

        print(" ------------------ vanna_oracle_service: 获取训练数据 -----------------------")
        training_data = self.get_training_data()
        print(training_data)

    @retry_with_exponential_backoff
    def vanna_oracle_ask(self, **kwargs):
        return self.vn.ask(**kwargs)

    def get_vn(self):
        return self.vn


if __name__ == '__main__':
    key = ""  # openai 的key https://platform.openai.com/api-keys

    model_name = "gpt-3.5-turbo"
    password = ''
    user = ''
    host = ""
    service_name = ""
    port = 1521
    vanna_server = VannaOracleService(
        openai_api_key=key,
        openai_model_name=model_name,
        user=user,
        password=password,
        host=host,
        port=port,
        service_name=service_name
    )
    vn = vanna_server.get_vn()
    from vanna.flask import VannaFlaskApp

    app = VannaFlaskApp(vn)
    app.run()

三、openAI + Chroma + Microsoft SQL Server

"""
使用组合:OpenAI + ChromaDB + SqlServer
库包要求:
    pip install 'vanna[chromadb,openai]'
需要准备 openai的api_key,使用的模型是gpt-3.5-turbo

其中
from vanna.openai import OpenAI_Chat
from vanna.chromadb import ChromaDB_VectorStore


class BasicVannaService(ChromaDB_VectorStore, OpenAI_Chat):
    def __init__(self, config=None):
        ChromaDB_VectorStore.__init__(self, config=config)
        OpenAI_Chat.__init__(self, config=config)

"""

from services.basic_vanna_service import BasicVannaService

from entity.decorators import retry_with_exponential_backoff


class VannaSqlServerService:

    def __init__(self, openai_api_key: str, openai_model_name: str):
        self.openai_api_key = openai_api_key

        self.openai_model_name = openai_model_name

        self.vn = BasicVannaService(config={'api_key': self.openai_api_key, 'model': self.openai_model_name})
        print(" ------------------ vanna_sqlserver_service: 初始化成功 -----------------------")

        self.vanna_connect_sqlserver()
        print(" ------------------ vanna_sqlserver_service: 链接数据库成功 -----------------------")

        self.vanna_sqlserver_training()
        print(" ------------------ vanna_sqlserver_service: 训练数据完成 -----------------------")

    def vanna_connect_sqlserver(self):
        """连接sqlserver数据库"""
        try:
            vn.connect_to_mssql(
                odbc_conn_str='DRIVER={ODBC Driver 17 for SQL Server};SERVER=localhost;DATABASE=testplm;UID=sa;PWD=123456')  # You can use the ODBC connection string here
        except Exception as e:
            print(e)

    def run_sql(self, sql):
        return self.vn.run_sql(sql)

    def get_training_plan_generic(self, schema):
        return self.vn.get_training_plan_generic(schema)

    @retry_with_exponential_backoff
    def train(self, **kwargs):
        self.vn.train(**kwargs)

    def get_training_data(self):
        return self.vn.get_training_data()

    def vanna_sqlserver_training(self):
        """训练数据"""
        # ----------------------------计划训练-----------------------------------------------------
        try:
            df_information_schema = self.run_sql("select * from [guest].[students]")
            print(df_information_schema)
            print(" ------------------ vanna_sqlserver_service: 获取训练计划的通用方法 -----------------------")
            plan = self.get_training_plan_generic(df_information_schema)
            print(plan)
            self.train(plan=plan)
        except Exception as e:
            print("error in plan training:{}".format(e))

        # ----------------------------DDL训练-----------------------------------------------------
        self.train(ddl="""
            CREATE TABLE [guest].[students] (
              [id] int  NOT NULL,
              [name] varchar(255) COLLATE Chinese_PRC_CI_AS  NULL,
              [age] int  NULL
            )
        """)

        # ----------------------------documentation训练-----------------------------------------------------
        # print(" ------------------ vanna_sqlserver_service: 通过给定文档训练模型 -----------------------")
        # self.train(
        #     documentation="Our business defines OTIF score as the percentage of orders that are delivered on time and "
        #                   "in full"
        # )

        # ----------------------------sql训练-----------------------------------------------------
        # try:
        #     print(" ------------------ vanna_sqlserver_service: 通过sql来训练模型 -----------------------")
        #     self.train(sql="SELECT * FROM `test.user` WHERE `age`>5;")
        # except ValidationError as e:
        #     print(e)

        print(" ------------------ vanna_sqlserver_service: 获取训练数据 -----------------------")
        training_data = self.get_training_data()
        print(training_data)

    @retry_with_exponential_backoff
    def vanna_sqlserver_ask(self, **kwargs):
        return self.vn.ask(**kwargs)

    def get_vn(self):
        return self.vn


if __name__ == '__main__':
    key = ""  # openai 的key https://platform.openai.com/api-keys

    model_name = "gpt-3.5-turbo"

    vanna_server = VannaSqlServerService(
        openai_api_key=key,
        openai_model_name=model_name
    )
    vn = vanna_server.get_vn()
    from vanna.flask import VannaFlaskApp

    app = VannaFlaskApp(vn)
    app.run()

四、openAI + Chroma + SQLite

"""
使用组合:OpenAI + ChromaDB + sqlite
库包要求:pip install 'vanna[chromadb,openai]'
需要准备 openai的api_key,使用的模型是gpt-3.5-turbo

其中
from vanna.openai import OpenAI_Chat
from vanna.chromadb import ChromaDB_VectorStore


class BasicVannaService(ChromaDB_VectorStore, OpenAI_Chat):
    def __init__(self, config=None):
        ChromaDB_VectorStore.__init__(self, config=config)
        OpenAI_Chat.__init__(self, config=config)

"""

from vanna.exceptions import ValidationError

from services.basic_vanna_service import BasicVannaService

from entity.decorators import retry_with_exponential_backoff


class VannaSQLiteService:

    def __init__(self, openai_api_key: str, openai_model_name: str, sqlite: str):
        self.openai_api_key = openai_api_key

        self.openai_model_name = openai_model_name

        self.vn = BasicVannaService(config={'api_key': self.openai_api_key, 'model': self.openai_model_name})
        print(" ------------------ vanna_sqlite_service: 初始化成功 -----------------------")

        self.vanna_connect_sqlite(sqlite)
        print(" ------------------ vanna_sqlite_service: 链接数据库成功 -----------------------")

        self.vanna_mysql_training()
        print(" ------------------ vanna_sqlite_service: 训练数据完成 -----------------------")

    def vanna_connect_sqlite(self, sqlite):
        """连接数据库"""
        try:
            self.vn.connect_to_sqlite(sqlite)
        except Exception as e:
            print(e)

    def run_sql(self, sql):
        return self.vn.run_sql(sql)

    def get_training_plan_generic(self, schema):
        return self.vn.get_training_plan_generic(schema)

    @retry_with_exponential_backoff
    def train(self, **kwargs):
        self.vn.train(**kwargs)

    def get_training_data(self):
        return self.vn.get_training_data()

    def vanna_mysql_training(self):
        # ----------------------------计划训练-----------------------------------------------------
        df_information_schema = self.run_sql("PRAGMA table_info(students);")
        print(df_information_schema)
        print(" ------------------ vanna_mysql_service: 获取训练计划的通用方法 -----------------------")
        plan = self.get_training_plan_generic(df_information_schema)
        self.train(plan=plan)

        # ----------------------------DDL训练-----------------------------------------------------
        self.train(ddl="""
            CREATE TABLE "students" (
              "id" INTEGER NOT NULL,
              "name" TEXT,
              "age" INTEGER,
              PRIMARY KEY ("id")
            )
        """)

        # ----------------------------documentation训练-----------------------------------------------------
        print(" ------------------ vanna_sqlite_service: 通过给定文档训练模型 -----------------------")
        self.train(
            documentation="Our business defines OTIF score as the percentage of orders that are delivered on time and "
                          "in full"
        )

        # ----------------------------sql训练-----------------------------------------------------
        try:
            self.train(sql="SELECT * FROM `students` WHERE `age`>5;")
            print(" ------------------ vanna_sqlite_service: 通过sql来训练模型 -----------------------")
        except ValidationError as e:
            print(e)

        training_data = self.get_training_data()
        print(" ------------------ vanna_sqlite_service: 获取训练数据 -----------------------")
        print(training_data)

    def vanna_mysql_ask(self, **kwargs):
        return self.vn.ask(**kwargs)

    def get_vn(self):
        return self.vn


if __name__ == '__main__':
    key = ""  # openai 的key https://platform.openai.com/api-keys
    model_name = "gpt-3.5-turbo"
    sqlite_url = "./test.sqlite3"
    vanna_server = VannaSQLiteService(
        openai_api_key=key,
        openai_model_name=model_name,
        sqlite=sqlite_url
    )
    vn = vanna_server.get_vn()
    from vanna.flask import VannaFlaskApp

    app = VannaFlaskApp(vn)
    app.run()


总结

基础的四种数据库示例执行,可以将官方的flask框架的源码进行改造,结合自己的数据库和LLM做搜索加强,使用成本不高。

下班找女朋友去。

记录一个问题

在使用windows系统+chroma进行数据训练时会触发的特有问题、
Vanna训练时进程结束

”ORA-24550: signal received: Unhandled exception: Code=c0000005 Flags=0

00000251140251DE<-00000251140253D0<-0000025114024429<-00000251141BD333<-00007FFA8FAE6227<-00007FFA932CE431<-00007FFA932B6506<-00007FFA932CA49D<-00007FFA9325FD43<-00007FFA932C960A<-00007FFA4B192EB0<-00007FFA38073B2C<-00007FFA380768A9<-00007FFA38093D6B<-00007FFA38093E04<-00007FFA3806BBA0<-00007FFA405345D6<-00007FFA4053BCEF<-00007FFA4063A469<-00007FFA404CCD4F<-00007FFA404CA4B7<-00007FFA4050061C<-00007FFA405004F3<-00007FFA404D1305<-00007FFA404C9165“

另外两种种表现形式是
1、进程结束
2、python意外退出
这种情况出现的原因是chroma的版本过高,将chromadb的版本回退到0.5.3就可以解决

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值