声明
文章只用于学习探讨,请勿用于非法行为。
实现原理
利用selenium操作文心一言对话页面,实现自动化问答和获取问答结果。
实现细节
'''
文心一言客户端
'''
import time
from selenium.webdriver import Keys
import config
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from log_factory import get_logger
# 获取日志记录器
logger = get_logger('wenxin_log')
class WEN_XIN_AI:
def __init__(self, driver=None):
self.driver = driver
if self.driver is None:
self.init_driver()
def init_driver(self):
'''
初始化driver
:return:
'''
selenium_config = config.Selenium_config()
# 设置 ChromeDriver 的服务
service = Service(selenium_config.chrome_driver_path)
# 配置 ChromeDriver 的选项以连接到已经运行的 Chrome 实例
chrome_options = Options()
# 设置运行数据的存储目录,下次再运行时可以记住上一次的登录状态
chrome_options.add_argument(f'--user-data-dir={selenium_config.user_data_dir}')
# 配置Chrome选项
options = webdriver.ChromeOptions()
options.add_argument("--disable-blink-features=AutomationControlled")
# options.add_argument("--headless") # 无头模式
options.add_argument(
"--user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36")
options.add_argument(f'--user-data-dir={selenium_config.user_data_dir}')
# 初始化WebDriver
driver = webdriver.Chrome(service=service, options=options)
driver.execute_cdp_cmd("Page.addScriptToEvaluateOnNewDocument", {
"source": """
Object.defineProperty(navigator, 'webdriver', {
get: () => undefined
})
"""
})
self.driver = driver
def open_index_page(self):
'''
打开文心一言首页
:return:
'''
self.driver.get('https://yiyan.baidu.com/')
# 等待页面加载完毕
WebDriverWait(self.driver, 60).until(lambda d: d.execute_script('return document.readyState') == 'complete')
logger.info(f'首页加载完成.... : {self.driver.title}')
def answer_is_finish(self):
try:
# 等待包含"重新生成"文本的元素出现
element = WebDriverWait(self.driver, 120).until(
EC.presence_of_element_located((By.XPATH, "//*[contains(text(), '重新生成')]"))
)
return True
except:
return False
def talk_to_ai(self, question):
question = question.replace('\n', '')
logger.info(f'文心一言问答,输入:{question}')
editable_div = self.driver.find_element(By.CLASS_NAME, 'yc-editor')
for char in question:
editable_div.send_keys(char)
# 模拟按下Enter键,防止按钮不生效
editable_div.send_keys(Keys.RETURN) # 或者 Keys.ENTER
# 输入完之后,需要停顿一下
time.sleep(3)
if self.answer_is_finish():
elements = self.driver.find_elements(By.XPATH, '//div[@class=\'custom-html\']')
text = elements[0].text
logger.info(f'文心一言回答:{text}')
return text
if __name__ == "__main__":
ai = WEN_XIN_AI(driver=None)
try:
ai.open_index_page()
answer = ai.talk_to_ai('你好')
logger.info(answer )
time.sleep(200)
finally:
ai.driver.quit()
总结
以上就是代码的实现细节,对于目前市面上的其他大模型都可以采用这种思路。在使用过程中,缺少安装包就先pip install 安装一下。文心一言有一定的反扒机制,长时间使用可能会被短暂封号