在docker中启动selenium gird使用扩展,并使用隧道代理,比如阿布云代理。how to set proxy with authentication in selenium chromedriver python proxy with authentication(账号密码认证代理)不支持chrome headless,但是对docker selenium 或者 selenium gird集群,是支持的。
启动selenium docker
docker run - d - p 4444 : 4444 - - shm- size= 2g - m 800M - - memory- swap= 800M - - name= chrome - - restart= always selenium/ standalone- chrome
selenium 使用隧道动态代理(使用中会生成本地zip插件文件)
import os
import time
import zipfile
from selenium import webdriver
from scrapy. selector import Selector
PROXY_HOST = "http-dyn.abuyun.com"
PROXY_PORT = 9020
PROXY_USER = ""
PROXY_PASS = ""
REMOTE_SELENIUM = "111.11.111.22:4444"
manifest_json = """
{
"version":"1.0.0",
"mainifest_version":2,
"name":"Chrome Proxy",
"permissions":[
"proxy",
"tabs",
"unlimitedStorage",
"storage",
"<all_urls>",
"webRequest",
"webRequestBlocking"
],
"backgroung":{
"scripts":["background.js"]
},
"minimum_chrome_version":"22.0.0"
}
"""
background_js = """
var config = {
mode: "fixed_servers",
rules: {
singleProxy: {
scheme: "http",
host: "%s",
port: parseInt(%s)
},
bypassList: ["localhost"]
}
};
chrome.proxy.settings.set({value: config, scope: "regular"}, function() {});
function callbackFn(details) {
return {
authCredentials: {
username: "%s",
password: "%s"
}
};
}
chrome.webRequest.onAuthRequired.addListener(
callbackFn,
{urls: ["<all_urls>"]},
['blocking']
);
""" % ( PROXY_HOST, PROXY_PORT, PROXY_USER, PROXY_PASS)
}
"" "
def get_chromedriver ( use_proxy= False , user_agent= None , use_docker= True ) :
path = os. path. dirname( os. path. abspath( __file__) )
chrome_options = webdriver. ChromeOptions( )
if use_proxy:
pluginfile = 'proxy_auth_plugin.zip'
with zipfile. ZipFile( pluginfile, 'w' ) as zp:
zp. writestr( "manifest.json" , manifest_json)
zp. writestr( "background.js" , background_js)
chrome_options. add_extension( pluginfile)
if user_agent:
chrome_options. add_argument( '--user-agent=%s' % user_agent)
if use_docker:
driver = webdriver. Remote(
command_executor= "http://{}/wd/hub" . format ( REMOTE_SELENIUM) ,
options= chrome_options
)
else :
driver = webdriver. Chrome(
os. path. join( path, '/usr/local/bin/chromedriver' ) ,
chrome_options= chrome_options)
return driver
def main ( ) :
driver = get_chromedriver( use_proxy= True , use_docker= True )
print ( driver)
n = 0
while True :
driver. get( 'https://www.cip.cc' )
ip_text = Selector( text= driver. page_source) . xpath(
'//pre/text()' ) . extract_first( ) . strip( )
print ( ip_text)
driver. close( )
time. sleep( 3 )
n += 1
if n > 10 :
break
driver. quit( )
if __name__ == '__main__' :
main( )