答案非常明确,使用不断增长的人工智能代理集合的最佳方式是以个人人工智能助理的形式。
不仅仅是“个人”人工智能助理,还是您的个人人工智能助理。
想象一下一个如此强大的工具,感觉就像是你思想的延伸。 在本视频中,我们将深入探讨我们可以构建和使用的最重要的代理应用程序的创建:您的个人人工智能助理。 该工具仅受您的想象力以及您跳入 Python 或打字稿代码并构建出色的代理工作流程、AI 代理、提示链和单独提示的能力的限制。 您的私人助理可以为您编码、为您研究并组织您的数字生活。 但为了实现这一愿景,我们必须采取小而渐进的步骤。 在这里,我们通过 JAN 来了解未来个人 AI 助理(下一级 VA)的早期原型。 Jan 是我的私人人工智能助理的名字。 它是一个原型,展示了这项技术能够为您做什么。
为了以个人人工智能助理的形式使用您的人工智能代理和提示链,我们需要一个提示您的代理的框架。 在本视频中,我们介绍了构建个人 AI 助理的两个关键框架:PAR ( Propmt :[ 语音转文字 ] - > Agent: [ AI代理路由 ] -> Response: [ 文字转语音 ] ) 框架和简单关键字 AI 代理路由器(LLM 路由器)。 PAR 框架为您和您的个人人工智能助理设置了一个干净的循环。 首先,您用自然语言说话,我们运行文本转语音 (TTS) 来捕获您的提示并将其转换为文本,从而成为您的 nlp/提示(自然语言提示)。
接下来,我们使用名为简单关键字 AI 代理路由器的 LLM 路由器,它会根据您的提示并决定运行哪些 AI 代理。 您的代理运行各自独立的工作流程,最后您的个人人工智能助理 (ai va) 使用语音转文本 (STT) 完成 PAR 框架来响应您。
这个框架的优点在于,它不会对您的提示、提示链或代理做出任何假设,所有这些都根据您的提示的激活关键字从 llm 路由器运行。 您可以运行 langchain、crewai、autogen 或任何其他代理框架来构建和运行您的代理工作流程。 在未来的视频中,我们将利用 AgentOS 微架构来构建可重用、可组合的 AI 代理。 然后,我们的 LLM 路由器将路由到我们的各个代理以运行专用功能。
这不是完整的工作代码。
这严格来说是个人 AI 助手第一个版本的 v0、scrapy 概念验证,仅用约 322 个 LOC 即可端到端工作。
这只是一个参考框架,供您了解如何构建个人 AI 助理 POC 的核心思想。
main7_jan_personal_ai_assistant_v0.py
import sys
import assemblyai as aai
from pydantic import BaseModel
import sounddevice as sd
import wave
from datetime import datetime
import os
from dotenv import load_dotenv
from elevenlabs import play
from elevenlabs.client import ElevenLabs
import subprocess
import pyperclip
from modules import llm
load_dotenv()
ACTIVATION_KEYWORD = "jan"
PERSONAL_AI_ASSISTANT_NAME = "jan"
HUMAN_COMPANION_NAME = "man"
channels = 1
iteration_start_time = None
def speak(text: str):
client = ElevenLabs(
api_key=os.getenv("ELEVEN_API_KEY"), # Defaults to ELEVEN_API_KEY from .env
)
# text=text, voice="WejK3H1m7MI9CHnIjW9K",
audio = client.generate(
text=text,
voice="WejK3H1m7MI9CHnIjW9K",
model="eleven_turbo_v2",
# model="eleven_multilingual_v2",
)
play(audio)
def run_bash_command(prompt: str):
run_bash_prompt = f"""You are a friendly, ultra helpful, attentive, concise AI assistant named '{PERSONAL_AI_ASSISTANT_NAME}'.
You work with your human companion '{HUMAN_COMPANION_NAME}' to build valuable experience through software.
You've been asked to run the following bash command: '{prompt}'
Here are available bash commands you can run
# chrome browser
browser() {{
open -a 'Google Chrome' $1
}}
# typescript playground
playt() {{
cursor "/Users/ravix/Documents/projects/experimental/playt"
}}
Based on the command - RESPOND WITH THE COMMAND to run in this JSON format: {{bash_command_to_run: ''}}.
Exclude any new lines or code blocks from the command. Respond with exclusively JSON.
Your command will be immediately run and the output will be returned to the user.
"""
class BashCommandResponse(BaseModel):
bash_command_to_run: str
response: BashCommandResponse = llm.gpt4t_w_vision_prompt(
run_bash_prompt, pydantic_model=BashCommandResponse
)
print(f"👧 Raw response: ", response)
command = response.bash_command_to_run
print(f"💻 {PERSONAL_AI_ASSISTANT_NAME} is running this command: ", command)
try:
command = "source ~/.bash_profile && " + command
result = subprocess.run(
command,
shell=True,
)
print(f"💻 Command executed successfully: {command}")
print(f"💻 Output: {result.stdout}")
except subprocess.CalledProcessError as e:
print(f"💻 Error executing command: {command}\n💻 Error: {e}")
return
soft_talk_prompt = f"""You are a friendly, ultra helpful, attentive, concise AI assistant named '{PERSONAL_AI_ASSISTANT_NAME}'.
You work with your human companion '{HUMAN_COMPANION_NAME}' to build valuable experience through software.
We both like short, concise, back-and-forth conversations.
We don't like small talk so we always steer our conversation back toward creating, building, product development, designing, and coding.
You've just helped your human companion run this bash command: {command}
Let your human companion know you've finished running the command and what you can do next."""
response = llm.gpro_1_5_prompt(soft_talk_prompt)
speak(response)
pass
def question_answer(prompt: str):
question_answer_prompt = f"""You are a friendly, ultra helpful, attentive, concise AI assistant named '{PERSONAL_AI_ASSISTANT_NAME}'.
You work with your human companion '{HUMAN_COMPANION_NAME}' to build valuable experience through software.
We both like short, concise, back-and-forth conversations, no longer than 2 sentences.
We don't like small talk so we always steer our conversation back toward creating, building, product development, designing, and coding.
We like to discuss in high level details without getting too technical.
Respond to the following question: {prompt}"""
response = llm.gpro_1_5_prompt(question_answer_prompt)
speak(response)
pass
def soft_talk(prompt: str):
soft_talk_prompt = f"""You are a friendly, ultra helpful, attentive, concise AI assistant named '{PERSONAL_AI_ASSISTANT_NAME}'.
You work with your human companion '{HUMAN_COMPANION_NAME}' to build valuable experience through software.
We both like short, concise, back-and-forth conversations.
We don't like small talk so we always steer our conversation back toward creating, building, product development, designing, and coding.
Respond to the following prompt: {prompt}"""
response = llm.gpro_1_5_prompt(soft_talk_prompt)
speak(response)
return
def shell_command(prompt: str):
shell_command_prompt = f"""You are a highly efficient, code-savvy AI assistant named '{PERSONAL_AI_ASSISTANT_NAME}'.
You work with your human companion '{HUMAN_COMPANION_NAME}' to build valuable experience through software.
Your task is to provide a JSON response with the following format: {{command_to_run: ''}} detailing the shell command for MacOS bash to based on this question: {prompt}.
After generating the response, your command will be attached DIRECTLY to your human companions clipboard to be run."""
class ShellCommandModel(BaseModel):
command_to_run: str
response = llm.gpt4t_w_vision_prompt(
prompt=shell_command_prompt,
pydantic_model=ShellCommandModel, # Assuming there's a suitable model or this parameter is handled appropriately within the function.
)
pyperclip.copy(response.command_to_run)
completion_prompt = f"""You are a friendly, ultra helpful, attentive, concise AI assistant named '{PERSONAL_AI_ASSISTANT_NAME}'.
You work with your human companion '{HUMAN_COMPANION_NAME}' to build valuable experience through software.
We both like short, concise, back-and-forth conversations.
You've just attached the command '{response.command_to_run}' to your human companion's clipboard like they've requested.
Let your human companion know you've attached it and let them know you're ready for the next task."""
completion_response = llm.gpro_1_5_prompt(completion_prompt)
speak(completion_response)
def end_conversation(prompt: str):
end_prompt = f"""You are a friendly, ultra helpful, attentive, concise AI assistant named '{PERSONAL_AI_ASSISTANT_NAME}'.
You work with your human companion '{HUMAN_COMPANION_NAME}' to build valuable experience through software.
We both like short, concise, back-and-forth conversations.
We're wrapping up our work for the day. You're a great engineering partner.
Thanks for all your help and for being a great engineering partner.
Respond to your human companions closing thoughts: {prompt}"""
response = llm.gpro_1_5_prompt(end_prompt)
speak(response)
sys.exit()
def get_keywords_agent_router():
"""
Decision making based on contents of prompt (Simple LLM Router).
"""
return {
"bash,browser": run_bash_command, # AI Agent | Agentic Workflow
"shell": shell_command, # AI Agent | Agentic Workflow
"question": question_answer, # AI Agent | Agentic Workflow
"hello,hey,hi": soft_talk, # AI Agent | Agentic Workflow
"exit": end_conversation,
}
def transcribe_audio_file(file_path):
aai.settings.api_key = os.getenv("ASSEMBLYAI_API_KEY")
transcriber = aai.Transcriber()
transcript = transcriber.transcribe(file_path)
return transcript
def track_interaction_time():
"""Track the time it takes for the user to interact with the system in seconds."""
global iteration_start_time
if iteration_start_time:
interaction_time = (datetime.now() - iteration_start_time).total_seconds()
print(f"🕒 Interaction time: {interaction_time} seconds")
iteration_start_time = None
def record_audio(duration=10, fs=44100):
"""Record audio from the microphone."""
track_interaction_time()
print("🔴 Recording...")
recording = sd.rec(
int(duration * fs), samplerate=fs, channels=channels, dtype="int16"
)
sd.wait()
print(f"🎧 Recording Chunk Complete")
global iteration_start_time
iteration_start_time = datetime.now()
return recording
def save_audio_file(recording, fs=44100, filename="output.wav"):
"""Save the recorded audio to a file."""
with wave.open(filename, "wb") as wf:
wf.setnchannels(channels)
wf.setsampwidth(2)
wf.setframerate(fs)
wf.writeframes(recording)
def personal_ai_assistant_loop(
audio_chunk_size=10, activation_keyword=ACTIVATION_KEYWORD, on_keywords=None
):
while True:
recording = record_audio(duration=audio_chunk_size)
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
filename = f"audio_{timestamp}.wav"
save_audio_file(recording, filename=filename)
file_size = os.path.getsize(filename)
print(f"📁 File {filename} has been saved with a size of {file_size} bytes.")
transcript = transcribe_audio_file(filename)
print("📝 transcript was:", transcript.text)
if activation_keyword.lower() in transcript.text.lower():
if on_keywords:
on_keywords(transcript)
os.remove(filename)
def text_after_keyword(transcript, keyword):
"""Extract and return the text that comes after a specified keyword in the transcript."""
try:
# Find the position of the keyword in the transcript
keyword_position = transcript.text.lower().find(keyword.lower())
if keyword_position == -1:
# If the keyword is not found, return an empty string
return ""
# Extract the text after the keyword
text_after = transcript.text[keyword_position + len(keyword) :].strip()
return text_after
except Exception as e:
print(f"Error extracting text after keyword: {e}")
return ""
def get_first_keyword_in_prompt(prompt: str):
map_keywords_to_agents = get_keywords_agent_router()
for keyword_group, agent in map_keywords_to_agents.items():
keywords = keyword_group.split(",")
for keyword in keywords:
if keyword in prompt.lower():
return agent, keyword
return None, None
def on_activation_keyword_detected(transcript: aai.Transcript):
print("✅ Activation keyword detected!, transcript is: ", transcript.text)
prompt = text_after_keyword(transcript, ACTIVATION_KEYWORD)
print("🔍 prompt is: ", prompt)
agent_to_run, agent_keyword = get_first_keyword_in_prompt(prompt)
if not agent_to_run:
print("❌ No agent found for the given prompt.")
return
print(f"✅ Found agent via keyword '{agent_keyword}'")
agent_to_run(prompt)
personal_ai_assistant_loop(on_keywords=on_activation_keyword_detected)
test.py
import pyttsx3
import speech_recognition as sr
# Initialize text-to-speech engine
engine = pyttsx3.init()
# Set properties to adjust voice characteristics
engine.setProperty('rate', 150) # Speed of speech (words per minute)
engine.setProperty('volume', 0.9) # Volume level (0.0 to 1.0)
engine.setProperty('voice', 'com.apple.speech.synthesis.voice.Alex') # Select a voice (this is for macOS)
# Initialize speech recognition
recognizer = sr.Recognizer()
def speak(text):
# Use text-to-speech engine to speak the text
engine.say(text)
engine.runAndWait()
def listen():
# Use speech recognition to listen for user input
with sr.Microphone() as source:
print("Listening...")
recognizer.adjust_for_ambient_noise(source)
audio = recognizer.listen(source)
try:
# Recognize speech using Google Speech Recognition
query = recognizer.recognize_google(audio)
print("You said:", query)
return query
except sr.UnknownValueError:
print("Sorry, I couldn't understand what you said.")
return ""
except sr.RequestError as e:
print("Could not request results from Google Speech Recognition service; {0}".format(e))
return ""
def assistant():
speak("Hello! How can I assist you today?")
while True:
query = listen().lower()
if "exit" in query:
speak("Goodbye!")
break
else:
# Add your assistant's functionality here
# For example, you could implement different actions based on user input
# Here's a simple example
if "hello" in query:
speak("Hello there!")
elif "time" in query:
# You can add more advanced functionalities like getting the current time
speak("Sorry, I can't provide the current time at the moment.")
# Run the assistant
if __name__ == "__main__":
assistant()
vueGPT:像老大一样自动生成 Vue 3 <script setup lang='ts'> 组件。
main.py
from vueGPT import prompt, make_client
from dotenv import load_dotenv
# load .env file
load_dotenv()
# get openai api key
OPENAI_API_KEY = environ.get('OPENAI_API_KEY')
query = "build a simple switch component that accepts an on prop"
model = 'gpt-4' # use 'gpt-3.5-turbo' only if you must
# build your client
client = chatgpt.make_client(OPENAI_API_KEY)
# run your prompt
prompt_response = chatgpt.prompt(client, query, model)
print(prompt_response)
requirements,txt
python-dotenv
openai
vue_gpt_script_setup_ts_prompt.txt
{----将其替换为您的 VUE 组件查询----}
严格返回 Vue 组件的代码,包括 <template>、<script setup lang='ts'> 和 <style> 部分。
Example component:
<template>
<div class='(filename)-w'>
<h1>{{ name }}</h1>
<h2>{{ age }}</h2>
<h2>{{ doubleAge }}</h2>
<input type='text' :value='name' @input='updateName($event.target.value)' />
</div>
</template>
<script lang='ts' setup>
import { toRefs, ref, defineProps, computed, onMounted } from 'vue'
// ---------------------------- Props / Emit ----------------------------
interface Props {
name: string
lastName?: string
}
const props = defineProps<Props>()
const { name } = toRefs(props)
const emit = defineEmits(['update'])
// ---------------------------- State / Getters ----------------------------
const age = ref(30)
const doubleAge = computed(_ => age.value * 2)
// ---------------------------- Methods ----------------------------
function updateName(value: string) {
emit('update', value)
}
// ---------------------------- Lifecycle Hooks ----------------------------
onMounted(() => {
console.log('mounted')
})
</script>
<style></style>
vueGPT.py
import json
from typing import Any, Dict
import openai
VUE_EXAMPLE = """<template>
<div class='(filename)-w'>
<h1>{{ name }}</h1>
<h2>{{ age }}</h2>
<h2>{{ doubleAge }}</h2>
<input type='text' :value='name' @input='updateName($event.target.value)' />
</div>
</template>
<script lang='ts' setup>
import { toRefs, ref, defineProps, computed, onMounted } from 'vue'
// ---------------------------- Props / Emit ----------------------------
interface Props {
name: string
lastName?: string
}
const props = defineProps<Props>()
const { name } = toRefs(props)
const emit = defineEmits(['update'])
// ---------------------------- State / Getters ----------------------------
const age = ref(30)
const doubleAge = computed(_ => age.value * 2)
// ---------------------------- Methods ----------------------------
function updateName(value: string) {
emit('update', value)
}
// ---------------------------- Lifecycle Hooks ----------------------------
onMounted(() => {
console.log('mounted')
})
</script>
<style></style>
"""
PROMPT = """You're a Senior Vue 3 developer. You build new Vue components using the new Composition API with <script setup lang='ts'>.
Your current assignment is to build a new vue component fulfilling the following requirements:
{vue_component}
Return strictly the code for the Vue component including <template>, <script setup lang='ts'>, and <style> sections.
Example component:
{vue_example}"""
# ------------------ helpers ------------------
def safe_get(data, dot_chained_keys):
'''
{'a': {'b': [{'c': 1}]}}
safe_get(data, 'a.b.0.c') -> 1
'''
keys = dot_chained_keys.split('.')
for key in keys:
try:
if isinstance(data, list):
data = data[int(key)]
else:
data = data[key]
except (KeyError, TypeError, IndexError):
return None
return data
def response_parser(response: Dict[str, Any]):
return safe_get(response, 'choices.0.message.content')
def make_client(gpt_api_key: str):
openai.api_key = gpt_api_key
return openai
# ------------------ content generators ------------------
def prompt(openai_client, vue_component_query: str, model: str) -> str:
complete_prompt = PROMPT.format(vue_component=vue_component_query, vue_example=VUE_EXAMPLE)
print(f"Firing off prompt")
response = openai_client.ChatCompletion.create(
model=model,
messages=[
{
"role": "user",
"content": complete_prompt,
}
]
)
print(f"response: {json.dumps(response, indent=2)}")
return response_parser(response)