使用不需要验证的代理
from selenium import webdriver
import random
chrome_options = webdriver. ChromeOptions( )
port = random. randint( 20000 , 20999 )
ip = "p.webshare.io:{}" . format ( port)
chromeOptions. add_argument( "--proxy-server=http:{}" . format ( ip) )
使用验证代理(设置验证代理如果是在linux服务器上可能无效的)
def create_proxyauth_extension ( proxy_host, proxy_port,
proxy_username, proxy_password,
scheme= 'http' , plugin_path= None ) :
"""代理认证插件
args:
proxy_host (str): 你的代理地址或者域名(str类型)
proxy_port (int): 代理端口号(int类型)
proxy_username (str):用户名(字符串)
proxy_password (str): 密码 (字符串)
kwargs:
scheme (str): 代理方式 默认http
plugin_path (str): 扩展的绝对路径
return str -> plugin_path
"""
if plugin_path is None :
plugin_path = '{}.zip' . format ( proxy_username)
manifest_json = """
{
"version": "1.0.0",
"manifest_version": 2,
"name": "Chrome Proxy",
"permissions": [
"proxy",
"tabs",
"unlimitedStorage",
"storage",
"<all_urls>",
"webRequest",
"webRequestBlocking"
],
"background": {
"scripts": ["background.js"]
},
"minimum_chrome_version":"22.0.0"
}
"""
background_js = string. Template(
"""
var config = {
mode: "fixed_servers",
rules: {
singleProxy: {
scheme: "${scheme}",
host: "${host}",
port: parseInt(${port})
},
bypassList: ["foobar.com"]
}
};
chrome.proxy.settings.set({value: config, scope: "regular"}, function() {});
function callbackFn(details) {
return {
authCredentials: {
username: "${username}",
password: "${password}"
}
};
}
chrome.webRequest.onAuthRequired.addListener(
callbackFn,
{urls: ["<all_urls>"]},
['blocking']
);
"""
) . substitute(
host= proxy_host,
port= proxy_port,
username= proxy_username,
password= proxy_password,
scheme= scheme,
)
with zipfile. ZipFile( plugin_path, 'w' ) as zp:
zp. writestr( "manifest.json" , manifest_json)
zp. writestr( "background.js" , background_js)
return plugin_path
proxyauth_plugin_path = create_proxyauth_extension(
proxy_host= "p.webshare.io" ,
proxy_port= 80 ,
proxy_username= username,
proxy_password= "xxxxxx"
)
chrome_options = webdriver. ChromeOptions( )
chrome_options. add_extension( proxyauth_plugin_path)
driver = webdriver. Chrome( 'C:/Users/Administrator/Desktop/merLan/chromedriver.exe' , chrome_options= chrome_options)
Linux服务器运行(加上下面几个参数就可以在服务器运行了)
chrome_options = webdriver. ChromeOptions( )
chrome_options. add_argument( '--headless' )
chrome_options. add_argument( '--no-sandbox' )
chrome_options. add_argument( '--disable-gpu' )
chrome_options. add_argument( '--disable-dev-shm-usage' )
webdriver. Chrome( 'C:/Users/Administrator/Desktop/merLan/chromedriver.exe' , chrome_options= chrome_options)
设置不加载图片
chrome_options = webdriver. ChromeOptions( )
prefs = { "profile.managed_default_content_settings.images" : 2 }
chrome_options. add_experimental_option( "prefs" , prefs)
webdriver. Chrome( 'C:/Users/Administrator/Desktop/merLan/chromedriver.exe' , chrome_options= chrome_options)
多线程运行
import threading
from selenium import webdriver
def browsers ( url) :
chrome_options = webdriver. ChromeOptions( )
port = random. randint( 20000 , 20999 )
ip = "p.webshare.io:{}" . format ( port)
chrome_options. add_argument( '--headless' )
chrome_options. add_argument( '--no-sandbox' )
chrome_options. add_argument( '--disable-gpu' )
chrome_options. add_argument( '--disable-dev-shm-usage' )
prefs = { "profile.managed_default_content_settings.images" : 2 }
chrome_options. add_experimental_option( "prefs" , prefs)
chrome_options. add_argument( '--proxy-server={}' . format ( ip) )
chrome_options. add_experimental_option( 'excludeSwitches' , [ 'enable-automation' ] )
browser = webdriver. Chrome( 'chromedriver' , options= chrome_options)
browser. get( url)
param = browser. page_source
browser. quit( )
return param
for i in range ( 10 ) :
t1 = threading. Thread( target= runs)
t1. start( )
t1. join( )