提示:文章写完后,目录可以自动生成,如何生成可参考右边的帮助文档
文章目录
前言
记录Vanna.AI,通过 openAI + Chroma + [Mysql、Oracle、Microsoft SQL Server、SQLite] 的组合进行本地部署使用,参考官网的执行示例
前提:openAI账号属于付费版,没有接口的访问速率限制,出现速率限制可参考https://platform.openai.com/docs/guides/rate-limits/error-mitigation
框架代码:https://github.com/vanna-ai/vanna-flask
一、openAI + Chroma + Mysql
"""
使用组合:OpenAI + ChromaDB + MySQL
库包要求:pip install 'vanna[chromadb,openai,mysql]'
需要准备 openai的api_key,使用的模型是gpt-3.5-turbo
其中
from vanna.openai import OpenAI_Chat
from vanna.chromadb import ChromaDB_VectorStore
class BasicVannaService(ChromaDB_VectorStore, OpenAI_Chat):
def __init__(self, config=None):
ChromaDB_VectorStore.__init__(self, config=config)
OpenAI_Chat.__init__(self, config=config)
"""
from vanna.exceptions import ValidationError
from services.basic_vanna_service import BasicVannaService
from entity.decorators import retry_with_exponential_backoff
class VannaMySQLService:
def __init__(self, openai_api_key: str, openai_model_name: str, **kwargs):
self.openai_api_key = openai_api_key
self.openai_model_name = openai_model_name
self.vn = BasicVannaService(config={'api_key': self.openai_api_key, 'model': self.openai_model_name})
print(" ------------------ vanna_mysql_service: 初始化成功 -----------------------")
self.vanna_connect_mysql(**kwargs)
print(" ------------------ vanna_mysql_service: 链接数据库成功 -----------------------")
self.vanna_mysql_training()
print(" ------------------ vanna_mysql_service: 训练数据完成 -----------------------")
def vanna_connect_mysql(self, **kwargs):
"""连接MySQL数据库"""
try:
self.vn.connect_to_mysql(
host=kwargs.get('host'),
dbname=kwargs.get('dbname'),
user=kwargs.get('user'),
password=kwargs.get('password'),
port=kwargs.get('port')
)
except Exception as e:
print(e)
def run_sql(self, sql):
return self.vn.run_sql(sql)
def get_training_plan_generic(self, schema):
return self.vn.get_training_plan_generic(schema)
@retry_with_exponential_backoff
def train(self, **kwargs):
self.vn.train(**kwargs)
def get_training_data(self):
return self.vn.get_training_data()
def vanna_mysql_training(self):
"""训练数据"""
# ----------------------------计划训练-----------------------------------------------------
df_information_schema = self.run_sql("SELECT * FROM INFORMATION_SCHEMA.COLUMNS")
print(df_information_schema)
print(" ------------------ vanna_mysql_service: 获取训练计划的通用方法 -----------------------")
plan = self.get_training_plan_generic(df_information_schema)
print(plan)
self.train(plan=plan)
# ----------------------------DDL训练-----------------------------------------------------
self.train(ddl="""
CREATE TABLE IF NOT EXISTS `user` (
id INT PRIMARY KEY,
name VARCHAR(100),
age INT
)
""")
# ----------------------------documentation训练-----------------------------------------------------
print(" ------------------ vanna_mysql_service: 通过给定文档训练模型 -----------------------")
self.train(
documentation="Our business defines OTIF score as the percentage of orders that are delivered on time and "
"in full"
)
# ----------------------------sql训练-----------------------------------------------------
try:
print(" ------------------ vanna_mysql_service: 通过sql来训练模型 -----------------------")
self.train(sql="SELECT * FROM `test.user` WHERE `age`>5;")
except ValidationError as e:
print(e)
print(" ------------------ vanna_mysql_service: 获取训练数据 -----------------------")
training_data = self.get_training_data()
print(training_data)
@retry_with_exponential_backoff
def vanna_mysql_ask(self, **kwargs):
return self.vn.ask(**kwargs)
def get_vn(self):
return self.vn
if __name__ == '__main__':
key = "" # openai 的key https://platform.openai.com/api-keys
model_name = "gpt-3.5-turbo"
host = "localhost"
dbname = "test"
user = "root"
password = "123456"
port = 3306
vanna_server = VannaMySQLService(
openai_api_key=key,
openai_model_name=model_name,
host=host,
dbname=dbname,
user=user,
password=password, port=port
)
vn = vanna_server.get_vn()
from vanna.flask import VannaFlaskApp
app = VannaFlaskApp(vn)
app.run()
代码中,Vanna的方法拆分是因为遇到了 openAI的接口访问速率限制的问题,依照openAI的解决方案添加滞后重试即装饰器retry_with_exponential_backoff,没有效果。。。
二、openAI + Chroma + Oracle
"""
使用组合:OpenAI + ChromaDB + Oracle
库包要求:
pip install 'vanna[chromadb,openai,mssql]'
pip install cx_Oracle
需要准备 openai的api_key,使用的模型是gpt-3.5-turbo
其中
from vanna.openai import OpenAI_Chat
from vanna.chromadb import ChromaDB_VectorStore
class BasicVannaService(ChromaDB_VectorStore, OpenAI_Chat):
def __init__(self, config=None):
ChromaDB_VectorStore.__init__(self, config=config)
OpenAI_Chat.__init__(self, config=config)
"""
import cx_Oracle
from services.basic_vanna_service import BasicVannaService
from entity.decorators import retry_with_exponential_backoff
class VannaOracleService:
def __init__(self, openai_api_key: str, openai_model_name: str, **kwargs):
self.openai_api_key = openai_api_key
self.openai_model_name = openai_model_name
self.vn = BasicVannaService(config={'api_key': self.openai_api_key, 'model': self.openai_model_name})
print(" ------------------ vanna_oracle_service: 初始化成功 -----------------------")
self.vanna_connect_oracle(**kwargs)
print(" ------------------ vanna_oracle_service: 链接数据库成功 -----------------------")
self.vanna_oracle_training()
print(" ------------------ vanna_oracle_service: 训练数据完成 -----------------------")
def vanna_connect_oracle(self, **kwargs):
"""连接oracle数据库"""
try:
dsn = cx_Oracle.makedsn(kwargs.get('host'), kwargs.get('port'), kwargs.get("service_name"))
self.vn.connect_to_oracle(user=kwargs.get('user'), password=kwargs.get('password'), dsn=dsn)
except Exception as e:
print(e)
def run_sql(self, sql):
return self.vn.run_sql(sql)
def get_training_plan_generic(self, schema):
return self.vn.get_training_plan_generic(schema)
@retry_with_exponential_backoff
def train(self, **kwargs):
self.vn.train(**kwargs)
def get_training_data(self):
return self.vn.get_training_data()
def vanna_oracle_training(self):
"""训练数据"""
# ----------------------------计划训练-----------------------------------------------------
try:
df_information_schema = self.run_sql(
"""
SELECT main.OWNER AS table_catalog,
main.OWNER AS table_schema,
main.*
FROM all_tab_cols main
WHERE main.OWNER = 'IQMS'
AND main.TABLE_NAME LIKE 'ARINVT%'
""")
print(df_information_schema)
print(" ------------------ vanna_oracle_service: 获取训练计划的通用方法 -----------------------")
plan = self.get_training_plan_generic(df_information_schema)
print(plan)
self.train(plan=plan)
except Exception as e:
print("error in plan training:{}".format(e))
# ----------------------------DDL训练-----------------------------------------------------
# self.train(ddl="""
# CREATE TABLE IF NOT EXISTS `user` (
# id INT PRIMARY KEY,
# name VARCHAR(100),
# age INT
# )
# """)
# ----------------------------documentation训练-----------------------------------------------------
# print(" ------------------ vanna_oracle_service: 通过给定文档训练模型 -----------------------")
# self.train(
# documentation="Our business defines OTIF score as the percentage of orders that are delivered on time and "
# "in full"
# )
# ----------------------------sql训练-----------------------------------------------------
# try:
# print(" ------------------ vanna_oracle_service: 通过sql来训练模型 -----------------------")
# self.train(sql="SELECT * FROM `test.user` WHERE `age`>5;")
# except ValidationError as e:
# print(e)
print(" ------------------ vanna_oracle_service: 获取训练数据 -----------------------")
training_data = self.get_training_data()
print(training_data)
@retry_with_exponential_backoff
def vanna_oracle_ask(self, **kwargs):
return self.vn.ask(**kwargs)
def get_vn(self):
return self.vn
if __name__ == '__main__':
key = "" # openai 的key https://platform.openai.com/api-keys
model_name = "gpt-3.5-turbo"
password = ''
user = ''
host = ""
service_name = ""
port = 1521
vanna_server = VannaOracleService(
openai_api_key=key,
openai_model_name=model_name,
user=user,
password=password,
host=host,
port=port,
service_name=service_name
)
vn = vanna_server.get_vn()
from vanna.flask import VannaFlaskApp
app = VannaFlaskApp(vn)
app.run()
三、openAI + Chroma + Microsoft SQL Server
"""
使用组合:OpenAI + ChromaDB + SqlServer
库包要求:
pip install 'vanna[chromadb,openai]'
需要准备 openai的api_key,使用的模型是gpt-3.5-turbo
其中
from vanna.openai import OpenAI_Chat
from vanna.chromadb import ChromaDB_VectorStore
class BasicVannaService(ChromaDB_VectorStore, OpenAI_Chat):
def __init__(self, config=None):
ChromaDB_VectorStore.__init__(self, config=config)
OpenAI_Chat.__init__(self, config=config)
"""
from services.basic_vanna_service import BasicVannaService
from entity.decorators import retry_with_exponential_backoff
class VannaSqlServerService:
def __init__(self, openai_api_key: str, openai_model_name: str):
self.openai_api_key = openai_api_key
self.openai_model_name = openai_model_name
self.vn = BasicVannaService(config={'api_key': self.openai_api_key, 'model': self.openai_model_name})
print(" ------------------ vanna_sqlserver_service: 初始化成功 -----------------------")
self.vanna_connect_sqlserver()
print(" ------------------ vanna_sqlserver_service: 链接数据库成功 -----------------------")
self.vanna_sqlserver_training()
print(" ------------------ vanna_sqlserver_service: 训练数据完成 -----------------------")
def vanna_connect_sqlserver(self):
"""连接sqlserver数据库"""
try:
vn.connect_to_mssql(
odbc_conn_str='DRIVER={ODBC Driver 17 for SQL Server};SERVER=localhost;DATABASE=testplm;UID=sa;PWD=123456') # You can use the ODBC connection string here
except Exception as e:
print(e)
def run_sql(self, sql):
return self.vn.run_sql(sql)
def get_training_plan_generic(self, schema):
return self.vn.get_training_plan_generic(schema)
@retry_with_exponential_backoff
def train(self, **kwargs):
self.vn.train(**kwargs)
def get_training_data(self):
return self.vn.get_training_data()
def vanna_sqlserver_training(self):
"""训练数据"""
# ----------------------------计划训练-----------------------------------------------------
try:
df_information_schema = self.run_sql("select * from [guest].[students]")
print(df_information_schema)
print(" ------------------ vanna_sqlserver_service: 获取训练计划的通用方法 -----------------------")
plan = self.get_training_plan_generic(df_information_schema)
print(plan)
self.train(plan=plan)
except Exception as e:
print("error in plan training:{}".format(e))
# ----------------------------DDL训练-----------------------------------------------------
self.train(ddl="""
CREATE TABLE [guest].[students] (
[id] int NOT NULL,
[name] varchar(255) COLLATE Chinese_PRC_CI_AS NULL,
[age] int NULL
)
""")
# ----------------------------documentation训练-----------------------------------------------------
# print(" ------------------ vanna_sqlserver_service: 通过给定文档训练模型 -----------------------")
# self.train(
# documentation="Our business defines OTIF score as the percentage of orders that are delivered on time and "
# "in full"
# )
# ----------------------------sql训练-----------------------------------------------------
# try:
# print(" ------------------ vanna_sqlserver_service: 通过sql来训练模型 -----------------------")
# self.train(sql="SELECT * FROM `test.user` WHERE `age`>5;")
# except ValidationError as e:
# print(e)
print(" ------------------ vanna_sqlserver_service: 获取训练数据 -----------------------")
training_data = self.get_training_data()
print(training_data)
@retry_with_exponential_backoff
def vanna_sqlserver_ask(self, **kwargs):
return self.vn.ask(**kwargs)
def get_vn(self):
return self.vn
if __name__ == '__main__':
key = "" # openai 的key https://platform.openai.com/api-keys
model_name = "gpt-3.5-turbo"
vanna_server = VannaSqlServerService(
openai_api_key=key,
openai_model_name=model_name
)
vn = vanna_server.get_vn()
from vanna.flask import VannaFlaskApp
app = VannaFlaskApp(vn)
app.run()
四、openAI + Chroma + SQLite
"""
使用组合:OpenAI + ChromaDB + sqlite
库包要求:pip install 'vanna[chromadb,openai]'
需要准备 openai的api_key,使用的模型是gpt-3.5-turbo
其中
from vanna.openai import OpenAI_Chat
from vanna.chromadb import ChromaDB_VectorStore
class BasicVannaService(ChromaDB_VectorStore, OpenAI_Chat):
def __init__(self, config=None):
ChromaDB_VectorStore.__init__(self, config=config)
OpenAI_Chat.__init__(self, config=config)
"""
from vanna.exceptions import ValidationError
from services.basic_vanna_service import BasicVannaService
from entity.decorators import retry_with_exponential_backoff
class VannaSQLiteService:
def __init__(self, openai_api_key: str, openai_model_name: str, sqlite: str):
self.openai_api_key = openai_api_key
self.openai_model_name = openai_model_name
self.vn = BasicVannaService(config={'api_key': self.openai_api_key, 'model': self.openai_model_name})
print(" ------------------ vanna_sqlite_service: 初始化成功 -----------------------")
self.vanna_connect_sqlite(sqlite)
print(" ------------------ vanna_sqlite_service: 链接数据库成功 -----------------------")
self.vanna_mysql_training()
print(" ------------------ vanna_sqlite_service: 训练数据完成 -----------------------")
def vanna_connect_sqlite(self, sqlite):
"""连接数据库"""
try:
self.vn.connect_to_sqlite(sqlite)
except Exception as e:
print(e)
def run_sql(self, sql):
return self.vn.run_sql(sql)
def get_training_plan_generic(self, schema):
return self.vn.get_training_plan_generic(schema)
@retry_with_exponential_backoff
def train(self, **kwargs):
self.vn.train(**kwargs)
def get_training_data(self):
return self.vn.get_training_data()
def vanna_mysql_training(self):
# ----------------------------计划训练-----------------------------------------------------
df_information_schema = self.run_sql("PRAGMA table_info(students);")
print(df_information_schema)
print(" ------------------ vanna_mysql_service: 获取训练计划的通用方法 -----------------------")
plan = self.get_training_plan_generic(df_information_schema)
self.train(plan=plan)
# ----------------------------DDL训练-----------------------------------------------------
self.train(ddl="""
CREATE TABLE "students" (
"id" INTEGER NOT NULL,
"name" TEXT,
"age" INTEGER,
PRIMARY KEY ("id")
)
""")
# ----------------------------documentation训练-----------------------------------------------------
print(" ------------------ vanna_sqlite_service: 通过给定文档训练模型 -----------------------")
self.train(
documentation="Our business defines OTIF score as the percentage of orders that are delivered on time and "
"in full"
)
# ----------------------------sql训练-----------------------------------------------------
try:
self.train(sql="SELECT * FROM `students` WHERE `age`>5;")
print(" ------------------ vanna_sqlite_service: 通过sql来训练模型 -----------------------")
except ValidationError as e:
print(e)
training_data = self.get_training_data()
print(" ------------------ vanna_sqlite_service: 获取训练数据 -----------------------")
print(training_data)
def vanna_mysql_ask(self, **kwargs):
return self.vn.ask(**kwargs)
def get_vn(self):
return self.vn
if __name__ == '__main__':
key = "" # openai 的key https://platform.openai.com/api-keys
model_name = "gpt-3.5-turbo"
sqlite_url = "./test.sqlite3"
vanna_server = VannaSQLiteService(
openai_api_key=key,
openai_model_name=model_name,
sqlite=sqlite_url
)
vn = vanna_server.get_vn()
from vanna.flask import VannaFlaskApp
app = VannaFlaskApp(vn)
app.run()
总结
基础的四种数据库示例执行,可以将官方的flask框架的源码进行改造,结合自己的数据库和LLM做搜索加强,使用成本不高。
下班找女朋友去。
记录一个问题
在使用windows系统+chroma进行数据训练时会触发的特有问题、
Vanna训练时进程结束
”ORA-24550: signal received: Unhandled exception: Code=c0000005 Flags=0
00000251140251DE<-00000251140253D0<-0000025114024429<-00000251141BD333<-00007FFA8FAE6227<-00007FFA932CE431<-00007FFA932B6506<-00007FFA932CA49D<-00007FFA9325FD43<-00007FFA932C960A<-00007FFA4B192EB0<-00007FFA38073B2C<-00007FFA380768A9<-00007FFA38093D6B<-00007FFA38093E04<-00007FFA3806BBA0<-00007FFA405345D6<-00007FFA4053BCEF<-00007FFA4063A469<-00007FFA404CCD4F<-00007FFA404CA4B7<-00007FFA4050061C<-00007FFA405004F3<-00007FFA404D1305<-00007FFA404C9165“
另外两种种表现形式是
1、进程结束
2、python意外退出
这种情况出现的原因是chroma的版本过高,将chromadb的版本回退到0.5.3就可以解决