项目背景
最近公司项目有个需求是Amazon本地谷歌浏览器JS插件自动化操作,原本需要人工一个个手输关键词检索,然后再利用AmazonJS插件点击下载csv文件按钮,一个个下载。如果数量上千或上万,那简直太繁琐了。故要实现自动化下载csv文件,我原本以为只需要利用playwright或者selenium的自动化即可轻易解决,没想到这个需要连接到本地浏览器,还要配上插件弄了半天,再加上是macOS系统下的路径很麻烦,参阅了大量博主资料,终于找到了解决办法。
原理:
chromedriver 加载浏览器插件--->打开浏览器--->
控制台输入keyword检索词--->使用selenium 点击下载按钮--->
等待页面出现下载完成,然后进行下一个视频的下载-->
直至keyword列表被遍历完-->over!
实现效果:
amazon插件js自动化
使用说明:
①配置好浏览器驱动,保证驱动路径没问题
②直接运行run.py即可
③运行前必须完全退出谷歌浏览器
连接本地浏览器最大的问题就是路径问题
# MacOS下路径自行修改
chrome_options.add_argument(r'user-data-dir=/Users/(你的)/Library/Application Support/Google/Chrome')
# window下路径自行修改
chrome_options.add_argument(r'user-data-dir=C:\Users\(你的)\AppData\Local\Google\Chrome\User Data')
修改好自己的路径即可运行下面代码
完整代码如下:
#!/usr/bin/env python
# coding:utf-8
"""
Name : run.py
Time : 2023/5/22 19:45
Desc:
"""
from loguru import logger
from selenium import webdriver
from time import sleep
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.chrome.service import Service # 新增
from selenium.webdriver.common.by import By
import pandas as pd
# 浏览器初始化
# 插件路径自行修改
extension_path = r"/亚马逊js插件连接本地浏览器版/Jungle-Scout-Extension-v7.3.2.crx"
chrome_options = Options()
chrome_options.add_extension(extension_path)
# MacOS下路径自行修改
chrome_options.add_argument(r'user-data-dir=/Users/(你的)/Library/Application Support/Google/Chrome')
# window下路径自行修改
chrome_options.add_argument(r'user-data-dir=C:\Users\(你的)\AppData\Local\Google\Chrome\User Data')
chrome_options.add_experimental_option('useAutomationExtension', False)
# 实例化一款浏览器
service = Service(executable_path='/usr/local/bin/chromedriver')
bor = webdriver.Chrome(service=service, options=chrome_options)
bor.set_page_load_timeout(3)
bor.maximize_window()
bor.implicitly_wait(7)
files = "/Users/zhangxuhe/Desktop/amazon关键词testing.xlsx"
df = pd.read_excel(files, usecols=[0]) # 读取项目名称列,不要列名
df_li = df.values.tolist()
for index, a in enumerate(df_li):
try:
index += 1
# print(index, a[0])
keyword = '+'.join(str(a[0]).split(' '))
# print(keyword)
if index == 1:
try:
bor.get(f'https://www.amazon.com/s?k={keyword}')
except:
pass
if index == 1:
# 获取当前浏览器所有窗口
wins = bor.window_handles
bor.switch_to.window(wins[1])
bor.close()
bor.switch_to.window(wins[0])
sleep(1)
logger.info('浏览器初始化中')
# sleep(3)
# 切换10044
# bor.find_element(By.XPATH, '/html/body/div[1]/header/div/div[1]/div[1]/div[2]/span/a/div[2]/span[2]').click()
# bor.find_element(By.XPATH, '//*[@id="GLUXZipUpdateInput"]').send_keys('10044')
# bor.find_element(By.XPATH, '//*[@id="GLUXZipUpdate"]/span/input').click()
# bor.find_element(By.XPATH, '//*[@id="GLUXConfirmClose"]').click()
# print('切换邮编成功!')
# 输入框输入
if index != 1:
bor.find_element(By.CSS_SELECTOR, '#twotabsearchtextbox').clear()
bor.find_element(By.CSS_SELECTOR, '#twotabsearchtextbox').send_keys(a[0])
try:
bor.find_element(By.CSS_SELECTOR, '#nav-search-submit-button').click()
except:
pass
# print('0')
sleep(3)
# 点击js窗口
try:
js = 'document.querySelector("#popup-button > div.JsIcon-sc-96t5rt-1.biMPgO").click()'
bor.execute_script(js)
except:
bor.find_element(By.ID, 'popup-button').click()
print('2')
# sleep(3)
# 点击下载表格
try:
bor.find_element(By.CSS_SELECTOR,
'#jsExtensionBaseModalId > div.Container-sc-1tzcbkm-1.gNfIMq > div > div.Flex-sc-sqmtka-0.TableContainer-sc-ybu0mp-1.bXUgal.hhoMvF > div.Flex-sc-sqmtka-0.Container-sc-n6pzpt-0.eQnaRt.huoEMs > div:nth-child(1) > div.Flex-sc-sqmtka-0.kGVKPL > div > div > div > label').click()
except:
js = 'document.querySelector("#jsExtensionBaseModalId > div.Container-sc-1tzcbkm-1.gNfIMq > div > div.Flex-sc-sqmtka-0.TableContainer-sc-ybu0mp-1.bXUgal.hhoMvF > div.Flex-sc-sqmtka-0.Container-sc-n6pzpt-0.eQnaRt.huoEMs > div:nth-child(1) > div.Flex-sc-sqmtka-0.kGVKPL > div > div > div").click()'
bor.execute_script(js)
print('3')
# sleep(3)
# 点击下载按钮
try:
bor.find_element(By.XPATH, '/html/body/div[8]/div/div/div[1]/label').click()
except:
bor.find_element(By.CSS_SELECTOR, '#radix-\:rvu\: > div > div:nth-child(1) > label').click()
print('4')
# print(f'{a[0]}下载成功!')
logger.success(f'{a[0]}下载成功!')
# 点击关闭窗口
try:
bor.find_element(By.CSS_SELECTOR,
'#jsExtensionBaseModalId > div.Flex-sc-sqmtka-0.Container-sc-96ffk0-0.ftsCjc.eNJwez > div:nth-child(5) > svg').click()
except:
bor.find_element(By.XPATH, '//*[@id="jsExtensionBaseModalId"]/div[1]/div[5]/svg').click()
print('5')
except:
logger.error(f'{a[0]}下载失败!')
文末附上
参考的原文大佬博主链接