参考文章:
1.使用browser-use进行数据爬取实战记录
2.官方代码下载地址
实现功能:读取网页的url链接并下载到本地
待实现:将action放到browser_use\controller\service.py的init函数中仍然能够执行这个action,希望评论区有人能帮忙解答一下,是还有什么没有发现的调用吗
主代码如下:
import asyncio
import json
import os
import time
from dotenv import load_dotenv
from langchain_openai import ChatOpenAI
from pydantic import SecretStr
import requests
from browser_use import Agent, Controller
from browser_use.agent.views import ActionResult
from browser_use.browser.browser import Browser, BrowserConfig
from browser_use.browser.context import BrowserContextConfig
# dotenv
load_dotenv()
def log_response():
# 每次调用时生成新的时间戳
timestamp = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
return timestamp
browser = Browser(
config=BrowserConfig(
# 更换浏览器的设置
chrome_instance_path='C:\\Program Files\\Google\\Chrome\\Application\\chrome.exe')
)
)
controller = Controller()
@controller.registry.action('保存结果到指定文件')
def save_to_file(text: str,file_path: str):
with open(file_path, 'w') as f:
f.write(text)
return ActionResult(extracted_content=text)
@controller.registry.action('download file from json_URL to local')
def download_file(url: str, save_path: str):
"""
从指定的 URL 下载文件并保存到指定路径。
:param url: 文件的 URL
:param save_path: 保存文件的路径(包含文件名)
"""
try:
response = requests.get(url, stream=True)
response.raise_for_status() # 检查请求是否成功
dir_path = os.path.dirname(save_path)
if not os.path.exists(dir_path):
os.makedirs(dir_path)
with open(save_path, 'wb') as file:
for chunk in response.iter_content(chunk_size=16384):
file.write(chunk)
print(f"文件已下载并保存至: {save_path}")
except requests.exceptions.RequestException as e:
print(f"下载失败: {e}")
async def run_search():
print(log_response())
agent = Agent(
task=(
'1.前往https://blog.csdn.net/m0_67547784/article/details/145259527'
'2.提取该博客中的所有图像的url,可以查找<img>标签快速定位,输出名为urls'
'3.提取urls中的前3个url,下载文件到D:\\Desktop\\img文件夹中'
),
llm=ChatOpenAI(
base_url='https://api.siliconflow.cn/v1',
model='deepseek-ai/DeepSeek-V3',
api_key=SecretStr(api_key),
),
use_vision=False,
tool_calling_method='raw',
generate_gif=False,
browser=browser,
controller=controller
)
await agent.run()
time.sleep(10)
print(log_response())
if __name__ == '__main__':
asyncio.run(run_search())