MetaGPT学习 - 第六章结业作业

最新推荐文章于 2024-07-10 22:11:08 发布

清晨2024

最新推荐文章于 2024-07-10 22:11:08 发布

阅读量374

点赞数 3

文章标签：学习人工智能 python

本文链接：https://blog.csdn.net/weixin_42531181/article/details/135869452

版权

import sys
import asyncio
import json

from typing import Optional
from uuid import uuid4

from metagpt.environment import Environment
from metagpt.subscription import SubscriptionRunner
from metagpt.logs import logger
from aiocron import crontab

from metagpt.actions.action import Action 
from metagpt.actions.action_node import ActionNode
from metagpt.roles import Role
from metagpt.schema import Message
from metagpt.tools.web_browser_engine import WebBrowserEngine

from metagpt.utils.parse_html import _get_soup
from pytz import BaseTzInfo



# 先写NODES
LANGUAGE = ActionNode(
    key="Language",
    expected_type=str,
    instruction="Provide the language used in the project, typically matching the user's requirement language.",
    example="en_us",
)

CRON_EXPRESSION = ActionNode(
    key="Cron Expression",
    expected_type=str,
    instruction="If the user requires scheduled triggering, please provide the corresponding 5-field cron expression. "
    "Otherwise, leave it blank.",
    example="",
)

CRAWLER_URL_LIST = ActionNode(
    key="Crawler URL List",
    expected_type=list[str],
    instruction="List the URLs user want to crawl. Leave it blank if not provided in the User Requirement.",
    example=["https://example1.com", "https://example2.com"],
)

PAGE_CONTENT_EXTRACTION = ActionNode(
    key="Page Content Extraction",
    expected_type=str,
    instruction="Specify the requirements and tips to extract from the crawled web pages based on User Requirement.",
    example="Retrieve the titles and content of articles published today.",
)

CRAWL_POST_PROCESSING = ActionNode(
    key="Crawl Post Processing",
    expected_type=str,
    instruction="Specify the processing to be applied to the crawled content, such as summarizing today's news.",
    example="Generate a summary of today's news articles.",
)

INFORMATION_SUPPLEMENT = ActionNode(
    key="Information Supplement",
    expected_type=str,
    instruction="If unable to obtain the Cron Expression, prompt the user to provide the time to receive subscription "
    "messages. If unable to obtain the URL List Crawler, prompt the user to provide the URLs they want to crawl. Keep it "
    "blank if everything is clear",
    example="",
)

NODES = [
    LANGUAGE,
    CRON_EXPRESSION,
    CRAWLER_URL_LIST,
    PAGE_CONTENT_EXTRACTION,
    CRAWL_POST_PROCESSING,
    INFORMATION_SUPPLEMENT,
]

PARSE_SUB_REQUIREMENTS_NODE = ActionNode.from_children("ParseSubscriptionReq", NODES)

PARSE_SUB_REQUIREMENT_TEMPLATE = """
### User Requirement
{requirements}
"""

SUB_ACTION_TEMPLATE = """
## Requirements
Answer the question based on the provided context {process}. If the question cannot be answered, please summarize the context.

## context
{data}"
"""


WRITE_ARTICAL_TEMPLATE = """

## User Requirement
{requirement}

## Context

The outline of html page to scrabe is shown below:

```tree
{outline}
```

please output in {language}
"""


def read_json_file(file_path):
   """
   从本地JSON文件中读取内容，并以json的形式返回文件内容。
 
   参数:
   file_path (str): JSON文件的路径
 
   返回:
   data: JSON文件内容
   """
   try:
       with open(file_path, 'r') as file:
           # 加载JSON文件
           data = json.load(file)
           # 将数据转换为字符串形式
           return data
   except FileNotFoundError:
       print(f"File not found: {file_path}")
   except json.JSONDecodeError:
       print(f"Failed to decode JSON from file: {file_path}")
   except Exception as e:
       print(f"An error occurred: {e}")
   return ""

def save_json_to_file(data, file_path):
   """
   将列表保存到本地文件中，使用JSON格式。
 
   参数:
   lst -- 要保存的列表
   file_path -- 目标文件的路径
   """
   try:
       with open(file_path, 'w') as f:
           json.dump(data, f)
       print(f"List saved to {file_path}")
   except Exception as e:
       print(f"An error occurred: {e}")



# 辅助函数: 获取html css大纲视图
def get_outline(page):
    soup = _get_soup(page.html)
    outline = []

    def process_element(element, depth):
        name = element.name
        if not name:
            return
        if name in ["script", "style"]:
            return

        element_info = {"name": element.name, "depth": depth}

        if name in ["svg"]:
            element_info["text"] = None
            outline.append(element_info)
            return

        element_info["text"] = element.string
        # Check if the element has an "id" attribute
        if "id" in element.attrs:
            element_info["id"] = element["id"]

        if "class" in element.attrs:
            element_info["class"] = element["class"]
        outline.append(element_info)
        for child in element.children:
            process_element(child, depth + 1)

    for element in soup.body.children:
        process_element(element, 1)

    return outline

# 触发器：crontab
class CronTrigger:   
    
    def __init__(self, msgs, tz: Optional[BaseTzInfo] = None) -> None:
        self.msgs = msgs
        spec = msgs.instruct_content.dict()
        self.crontab = crontab(spec["Cron Expression"], tz=tz)      

    def __aiter__(self):
        return self

    async def __anext__(self):
        await self.crontab.next()
        return Message("立刻发送给我")

# 分析订阅需求的Action
class ParseSubRequirement(Action):
    async def run(self, requirements):
        print("ParseSubRequirement")
        context = PARSE_SUB_REQUIREMENT_TEMPLATE.format(requirements=requirements.content)
        node = await PARSE_SUB_REQUIREMENTS_NODE.fill(context=context, llm=self.llm)

        head_index = node.content.find('\n')
        if head_index != -1:
            tmp_content = node.content[head_index + 1:]
        
        end_index = tmp_content.rfind("\n")
        if end_index != -1:
            tmp_content = tmp_content[:end_index]

        print(tmp_content)
        req_json = json.loads(tmp_content)
        save_json_to_file(req_json,"requirement.json")
        return node


class CrawAndWrite(Action):
    async def run(self, msgs):
        requirements = read_json_file("requirement.json")
        #tmp_req = json.loads(requirements)
        urls = requirements["Crawler URL List"]
        page = await WebBrowserEngine().run(urls[0])

        outline = get_outline(page)
        outline = "\n".join(
            f"{' '*i['depth']}{'.'.join([i['name'], *i.get('class', [])])}: {i['text'] if i['text'] else ''}"
            for i in outline
        )
        requirement = requirements["Page Content Extraction"] + requirements["Crawl Post Processing"]
        language = requirements["Language"]
        prompt = WRITE_ARTICAL_TEMPLATE.format(outline=outline, requirement=requirement,language=language)
        print("CrawAndWrite - prompt",prompt)
        return await self.llm.aask(prompt)


class ArticalWriter(Role):
    name: str = "John"
    profile: str = "Artical Writer"
    goal: str = "Crawl the page content, summarize the information and write the artical"
    constraints: str = "Find and get the useful information in the page"

    def __init__(self, **kwargs) -> None:
        super().__init__(**kwargs)
        self._init_actions([CrawAndWrite])
    
    async def _act(self) -> Message:    
        todo = self.rc.todo  
        msg = self.get_memories(k=1)[0]  # find the most recent messages

        resp = await todo.run(msg)
        msg = Message(content=resp, role=self.profile,
                      cause_by=type(todo))
        self.rc.memory.add(msg)
        return msg


# 定义订阅助手角色
class SubscriptionAssistant(Role):
    """Analyze user subscription requirements."""

    name: str = "Grace"
    profile: str = "Subscription Assistant"
    goal: str = "analyze user subscription requirements to provide personalized subscription services."
    constraints: str = "utilize the same language as the User Requirement"

    def __init__(self, **kwargs) -> None:
        super().__init__(**kwargs)
        self._init_actions([ParseSubRequirement])

    async def _act(self) -> Message:    
        todo = self.rc.todo  
        msg = self.get_memories(k=1)[0]  # find the most recent messages

        resp = await todo.run(msg)
        msg = Message(content=resp.content, instruct_content=resp.instruct_content,role=self.profile,
                      cause_by=type(todo))
        self.rc.memory.add(msg)
        return msg

async def callback(msg):
    print(msg)

async def main():
    #msg = "从36kr创投平台 https://pitchhub.36kr.com/financing-flash 爬取所有初创企业融资的信息，获取标题，链接， 时间，总结今天的融资新闻，然后在今天下午4点21分发送给我"
    msg="从 https://www.qbitai.com/category/资讯 获取量子位今天推送的文章，总结今天的主要资讯，然后在每天下午4点24分发送给我"
    role = SubscriptionAssistant()
    logger.info(msg)
    result = await role.run(msg)
    logger.info(result)

    Environment
    runner = SubscriptionRunner()
    await runner.subscribe(ArticalWriter(), CronTrigger(result), callback)
    await runner.run()

asyncio.run(main())

运行结果：

在今天的融资快报中，我们关注到了一系列初创企业的融资动态。以下是部分摘要和相关信息：

1. 睿普康完成过亿元A轮融资

- 简介：睿普康是一家专业的卫星通信、蜂窝通信及电源管理芯片研发企业。

- 投资方：深投控、阿玛拉、合肥海恒等。

- 时间：4小时前

- [原文链接](#)

2. Accent Therapeutics获7500万美元C轮融资

- 简介：Accent Therapeutics致力于开发针对新型和已知肿瘤靶点的疗法。

- 时间：5小时前

- 作者：36氪To B产业报道

3. HEPHAISTOS-Pharma获200万欧元种子轮融资

- 简介：HEPHAISTOS-Pharma的首款产品ONCO-Boost是一种天然的TLR4激动剂。

- 时间：5小时前

- 作者：36氪To B产业报道

4. Elephas Biosciences Corporation获5500万美元C轮融资

- 简介：Elephas Biosciences Corporation正在开发肿瘤成像诊断平台。

- 时间：5小时前

- 作者：36氪To B产业报道

5. “武汉光钜”完成2亿元人民币B轮融资

- 简介：武汉光钜专注研发和生产射频前端BAW/FBAR滤波器。

- 投资方：宁波甬商实业、湖北省铁路发展基金等。

- 时间：21小时前

- [原文链接](#)

6. 半导体量检测设备公司“谦视智能”完成数千万元A+轮融资

- 简介：谦视智能宣布完成A+轮数千万元融资，将设立生产基地。

- 投资方：沃赋创投、高瓴创投等。

- 时间：22小时前

- [原文链接](#)

7. “俊丰新材”完成数千万元A轮融资

- 简介：俊丰新材是一家从事电子材料研发、制造、销售的高科技公司。

- 投资方：乾融控股独家投资。

- 时间：22小时前

- [原文链接](#)

8. 牛肉饭品牌“牛大吉”完成8200万B1轮融资

- 简介：牛大吉已完成8200万B1轮融资，用于产业供应链建设。

- 投资方：春涧资本领投，深熊资本跟投。

- 时间：22小时前

- [原文链接](#)

9. “知策科技”获顺为数千万元种子轮融资

- 简介：知策科技专注于复杂制造业，开发基于人工智能技术的工业软件。

- 投资方：顺为资本。

- 时间：昨天

- [原文链接](#)

10. 中资离岸债券平台“久期”完成A轮融资

- 简介：久期完成A轮融资，估值达到了1亿元人民币。

- 时间：昨天

这些融资事件反映了资本市场对初创企业的持续关注和支持，特别是在卫星通信、医疗健康、半导体、人工智能等领域。随着资金的注入，这些公司将能够加速产品研发、市场拓展和产能扩充，推动科技创新和产业升级。

清晨2024

关注

3
点赞
踩
9

收藏

觉得还不错? 一键收藏
1
评论
MetaGPT学习 - 第六章结业作业

这些融资事件反映了资本市场对初创企业的持续关注和支持，特别是在卫星通信、医疗健康、半导体、人工智能等领域。- 简介：HEPHAISTOS-Pharma的首款产品ONCO-Boost是一种天然的TLR4激动剂。- 简介：睿普康是一家专业的卫星通信、蜂窝通信及电源管理芯片研发企业。- 简介：俊丰新材是一家从事电子材料研发、制造、销售的高科技公司。- 简介：牛大吉已完成8200万B1轮融资，用于产业供应链建设。- 简介：谦视智能宣布完成A+轮数千万元融资，将设立生产基地。- 投资方：深投控、阿玛拉、合肥海恒等。
复制链接

扫一扫