太叼了。
解决AutoGen下使用GoogleSearch痛点问题,当然,这个工具可以结合任意langchain或者Agent使用,将工具作为Tools中的一个Function进行调用即可,可以实现全面的自动搜索和信息整合,解决大模型知识陈旧问题!!!
即使使用gpt-3.5也可以实现意外的效果!!!
集帅们!墙推!
链接如下:
https://pypi.org/project/autogen-google-search/0.0.5/
1.源代码
以下为源代码:
import os
from autogen import config_list_from_json
import autogen
import requests
from bs4 import BeautifulSoup
import json
from langchain.chat_models import ChatOpenAI
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.chains.summarize import load_summarize_chain
from langchain import PromptTemplate
import openai
from dotenv import load_dotenv
from google_prompt import REASEARCH_PROMPT
# Get API key
load_dotenv()
config_list3 = autogen.config_list_from_json(
"OAI_CONFIG_LIST",
filter_dict={
"model": ["gpt-3.5-turbo"],
},
)
# Define research function
def search(query):
url = "https://google.serper.dev/search"
payload = json.dumps({
"q": query
})
headers = {
'X-API-KEY': 'xxxxx',
'Content-Type': 'application/json'
}
response = requests.request("POST", url, headers=headers, data=payload)
return response.json()
def scrape(url: str):
# scrape website, and also will summarize the content based on objective if the content is too large
# objective is the original objective & task that user give to the agent, url is the url of the website to be scraped
print("Scraping website...")
# Define the headers for the request
headers = {
'Cache-Control': 'no-cache',
'Content-Type': 'application/json',
}
# Define the data to be sent in the request
data = {
"url": url
}
# Convert Python object to JSON string
data_json = json.dumps(data)
# Send the POST request
response = requests.post(
"https://chrome.browserless.io/content?token=2db344e9-a08a-4179-8f48-195a2f7ea6ee", headers=headers, data=data_json)
# Check the response status code
if response.status_code == 200:
soup = BeautifulSoup(response.content, "html.parser")
text = soup.get_text()
print("CONTENTTTTTT:", text)
if len(text) > 8000:
output = summary(text)
return output
else:
return text
else:
print(f"HTTP request failed with status code {response.status_code}")
def summary(content):
llm = ChatOpenAI(temperature=0, model="gpt-3.5-turbo")
text_splitter = RecursiveCharacterTextSplitter(
separators=["\n\n", "\n"], chunk_size=10000, chunk_overlap=500)
docs = text_splitter.create_documents([content])
map_prompt = """
Write a detailed summary of the following text for a research purpose:
"{text}"
SUMMARY:
"""
map_prompt_template = PromptTemplate(
template=map_prompt, input_variables=["text"])
summary_chain = load_summarize_chain(
llm=llm,
chain_type='map_reduce',
map_prompt=map_prompt_template,
combine_prompt=map_prompt_template,
verbose=True
)
output = summary_chain.run(input_documents=docs,)
return output
def research(query):
llm_config_researcher = {
"functions": [
{
"name": "search",
"description": "google search for relevant information",
"parameters": {
"type": "object",
"properties": {
"query": {
"type": "string",
"description": "Google search query",
}
},
"required": ["query"],
},
},
{
"name": "scrape",
"description": "Scraping website content based on url",
"parameters": {
"type": "object",
"properties": {
"url": {
"type": "string",
"description": "Website url to scrape",
}
},
"required": ["url"],
},
},
],
"config_list": config_list3}
researcher = autogen.AssistantAgent(
name="researcher",
# system_message="Research about a given query, collect as many information as possible, and generate detailed research results with loads of technique details with all reference links attached;Add TERMINATE to the end of the research report;",
system_message=REASEARCH_PROMPT,
llm_config=llm_config_researcher,
)
user_proxy = autogen.UserProxyAgent(
name="User_proxy",
code_execution_config={"last_n_messages": 2, "work_dir": "coding","use_docker": False,},
# code_execution_config=False,
is_termination_msg=lambda x: x.get("content", "") and x.get(
"content", "").rstrip().endswith("TERMINATE"),
human_input_mode="NEVER",
function_map={
"search": search,
"scrape": scrape,
}
)
user_proxy.initiate_chat(researcher, message=query,max_round=4)
# set the receiver to be researcher, and get a summary of the research report
user_proxy.stop_reply_at_receive(researcher)
user_proxy.send(
"Give me the research report that just generated again, return ONLY the report & reference links", researcher)
# return the last message the expert received
return user_proxy.last_message()["content"]
2.依赖内容
2.1 OAI_CONFIG_LIST文件
文件格式如下:
[
{
"model": "gpt-3.5-turbo",
"api_key": "sk-xxxxx"
},
{
"model": "gpt-4",
"api_key": "sk-xxxx",
"base_url": "xxxxx"
}
]
2.2 prompt文件
Agent结构较为简单,可以根据自己实际需求修改Agent的system_message,或者添加更多的Agent完成更为复杂的查询任务,比如最后增加一个写blog的Agent实现根据搜索结果写文章的功能!
COMPLETION_PROMPT = "If everything looks good, respond with APPROVED"
REASEARCH_PROMPT = """
You are a specialist in online resource searching. You can search for resources and summarize them in a reasonable format for users based on various questions they pose. Research a given query, collect as much information as possible, and generate detailed research results with loads of technical details, all reference links attached. If product search is involved, please use the full name rather than abbreviations for related products and companies. Add "TERMINATE" to the end of the research report.
"""
3.使用
执行
pip install autogen-google-search==0.0.1
python环境测试:
一行代码即可完成伟大的功能,当然,他完全可以作为一个外部函数供其他Agent使用。
from autogen_product_withgoogle import google_search
res = google_search.search("帮我MiltiAgent框架最新知识,帮我写一篇500字博客")
print(res)
运行结果: