# 无头模式
chrome_options.add_argument('headless')
firefox_options.add_argument('-headless')
# 设置User-Agent
chrome_options.add_argument('user-agent=Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.198 Safari/537.36')
# 设置代理
chrome_options.add_argument('--proxy-server=http://171.37.13.94:8123')
firefox_options.set_preference('network.proxy.http', "207.246.94.220")
firefox_options.set_preference('network.proxy.http_port', 3353)
Linux下以代理形式启动Chrome
google-chrome --proxy-server=http://207.246.94.220:3353
Mac下启动Chrome:
/Applications/Google\ Chrome.app/Contents/MacOS/Google\ Chrome
或者
open -a /Applications/Google\ Chrome.app
使用指定代理打开Chrome
open -a /Applications/Google\ Chrome.app --args --proxy-server=socks5://127.0.0.1:1080
只用指定的pac代理打开Chrome
open -a /Applications/Google\ Chrome.app/ --args --proxy-pac-url=file:///Users/liuhao/tmp/auto_switch.pac
https://my.oschina.net/chinaliuhan/blog/3227828
Mac终端启用Debug、无痕Chrome
/Applications/Google\ Chrome.app/Contents/MacOS/Google\ Chrome --remote-debugging-address=0.0.0.0 --remote-debugging-port=5333 --incognito
linux下启动Chrome远程调试
需要以无头模式启动,否则无法远程调试
google-chrome --remote-debugging-address=0.0.0.0 --remote-debugging-port=5333 -headless
远程调试程序:
from selenium import webdriver
def open_server():
options = webdriver.ChromeOptions()
options.debugger_address = "10.12.5.66:5333"
# options.add_argument("--proxy-server=http://216.158.192.109:8800")
# 因为已经启动,故设置代理无效!!!
browser = webdriver.Chrome(options=options)
browser.set_window_size(1920, 1080)
browser.get('http://httpbin.org/ip')
browser.save_screenshot('a.png')
print(browser.page_source)
if __name__ == '__main__':
open_server()
linux下以代理形式启动Chrome并远程调试
google-chrome --proxy-server=http://207.246.94.220:3353 --remote-debugging-address=0.0.0.0 --remote-debugging-port=5333 -headless
# linux下以代理形式启动Chrome并远程调试,并禁止图片的加载
google-chrome --proxy-server=http://216.158.192.109:8800 --remote-debugging-address=0.0.0.0 --remote-debugging-port=5333 -headless --disable-images
调试程序
import subprocess
from selenium import webdriver
def open_server():
options = webdriver.ChromeOptions()
# options.debugger_address = "10.12.5.66:5333" mac上用不了
options.add_experimental_option("debuggerAddress", "127.0.0.1:5333")
browser = webdriver.Chrome(options=options)
browser.get('http://httpbin.org/ip')
browser.save_screenshot('a.png')
print(browser.page_source)
if __name__ == '__main__':
open_server()
使用指定路径的ChromeDriver
webdriver.Chrome(executable_path='./chromedriver', chrome_options=chrome_options)
禁止图片加载
# 禁止图片和css加载
# 2代表禁止加载,1代表允许加载
# 不适用于debugging模式
prefs = {
'profile.default_content_setting_values': {
'images': 2,
'permissions.default.stylesheet': 2
}
}
options.add_experimental_option("prefs", prefs)
关闭提示栏
options.add_experimental_option('excludeSwitches', ['enable-automation'])
设置webdriver
script = 'Object.defineProperty(navigator, "webdriver", {get: () => false,});'
browser.execute_script(script)
程序
from selenium import webdriver
def new_browser():
options = webdriver.ChromeOptions()
options.add_argument('--proxy-server=http://104.225.151.34:59599')
options.add_experimental_option('excludeSwitches', ['enable-automation'])
options.add_argument('--incognito')
prefs = {
'profile.default_content_setting_values': {
'images': 2
}
}
options.add_experimental_option("prefs", prefs) # 设置不加载图片
browser = webdriver.Chrome(options=options)
browser.implicitly_wait(10)
return browser
if __name__ == '__main__':
browser = new_browser()
browser.get('http://httpbin.org/ip')
input()
启动Chrome子进程:(后者无法自动关掉)
p = subprocess.Popen(["google-chrome","--remote-debugging-port=3353"])
p.kill()
def openChrome():
os.system('''"C:\Program Files (x86)\Google\Chrome\Application\chrome.exe\" --remote-debugging-port=9222''')
_thread.start_new_thread(openChrome,())
配置独立Chrome
新建文件夹/Users/xxx/Documents/Chrome_separate/one
/Applications/Google\ Chrome.app/Contents/MacOS/Google\ Chrome --user-data-dir="/Users/xxx/Documents/Chrome_separate/one"
google-chrome --user-data-dir="/home/xxx/chrome_separate/one"
显示画面
import base64
from io import BytesIO
from PIL import Image
Image.open(BytesIO(base64.b64decode(browser.get_screenshot_as_base64()))).show()
正确移除webdriver特征
from selenium import webdriver
options = webdriver.ChromeOptions()
options.add_experimental_option("excludeSwitches", ["enable-automation"])
options.add_experimental_option('useAutomationExtension', False)
driver = webdriver.Chrome(options=options)
driver.execute_cdp_cmd("Page.addScriptToEvaluateOnNewDocument", {
"source": """
Object.defineProperty(navigator, 'webdriver', {
get: () => undefined
})
"""
})
driver.get('https://bot.sannysoft.com/')
import time
from selenium.webdriver import Chrome
from selenium.webdriver.chrome.options import Options
chrome_options = Options()
chrome_options.add_argument("--headless")
chrome_options.add_argument('user-agent=Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.198 Safari/537.36')
driver = Chrome(options=chrome_options)
with open('./stealth.min.js') as f:
js = f.read()
driver.execute_cdp_cmd("Page.addScriptToEvaluateOnNewDocument", {
"source": js
})
driver.get('https://bot.sannysoft.com/')
time.sleep(5)
driver.save_screenshot('result.png')
# 你可以保存源代码为 html 再双击打开,查看完整结果
source = driver.page_source
with open('result.html', 'w') as f:
f.write(source)
默认窗口尺寸
{‘width’: 1051, ‘height’: 806}
最好以窗口最大化运行
浏览器最大化时被识别
执行滑动的action被识别
send_keys后被识别
使用 win32api, 系统级别去控制鼠标移动
修改User-Agent
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
# 创建 ChromeOptions 对象
chrome_options = Options()
# 设置远程调试地址和端口
chrome_options.debugger_address = "127.0.0.1:5333"
# 启动 Chrome 浏览器
driver = webdriver.Chrome(options=chrome_options)
driver.get('https://httpbin.org/get')
user_agent = 'Your Custom User-Agent'
driver.execute_cdp_cmd('Network.setUserAgentOverride', {"userAgent": user_agent})
# 打开网页
driver.get('https://httpbin.org/get')
# 关闭浏览器
driver.quit()