Python+selenium的使用

使用不需要验证的代理
from selenium import webdriver
import random
chrome_options = webdriver.ChromeOptions()
port = random.randint(20000,20999)
ip = "p.webshare.io:{}".format(port)
chromeOptions.add_argument("--proxy-server=http:{}".format(ip))
使用验证代理(设置验证代理如果是在linux服务器上可能无效的)
def create_proxyauth_extension(proxy_host, proxy_port,
                               proxy_username, proxy_password,
                               scheme='http', plugin_path=None):
    """代理认证插件

    args:
        proxy_host (str): 你的代理地址或者域名(str类型)
        proxy_port (int): 代理端口号(int类型)
        proxy_username (str):用户名(字符串)
        proxy_password (str): 密码 (字符串)
    kwargs:
        scheme (str): 代理方式 默认http
        plugin_path (str): 扩展的绝对路径

    return str -> plugin_path
    """
    if plugin_path is None:
        plugin_path = '{}.zip'.format(proxy_username)

    manifest_json = """
    {
        "version": "1.0.0",
        "manifest_version": 2,
        "name": "Chrome Proxy",
        "permissions": [
            "proxy",
            "tabs",
            "unlimitedStorage",
            "storage",
            "<all_urls>",
            "webRequest",
            "webRequestBlocking"
        ],
        "background": {
            "scripts": ["background.js"]
        },
        "minimum_chrome_version":"22.0.0"
    }
    """

    background_js = string.Template(
    """
    var config = {
            mode: "fixed_servers",
            rules: {
              singleProxy: {
                scheme: "${scheme}",
                host: "${host}",
                port: parseInt(${port})
              },
              bypassList: ["foobar.com"]
            }
          };

    chrome.proxy.settings.set({value: config, scope: "regular"}, function() {});

    function callbackFn(details) {
        return {
            authCredentials: {
                username: "${username}",
                password: "${password}"
            }
        };
    }

    chrome.webRequest.onAuthRequired.addListener(
                callbackFn,
                {urls: ["<all_urls>"]},
                ['blocking']
    );
    """
    ).substitute(
        host=proxy_host,
        port=proxy_port,
        username=proxy_username,
        password=proxy_password,
        scheme=scheme,
    )
    with zipfile.ZipFile(plugin_path, 'w') as zp:
        zp.writestr("manifest.json", manifest_json)
        zp.writestr("background.js", background_js)

    return plugin_path

#使用
proxyauth_plugin_path = create_proxyauth_extension(
            proxy_host="p.webshare.io",
            proxy_port=80,
            proxy_username=username,
            proxy_password="xxxxxx"
            )
chrome_options = webdriver.ChromeOptions()
chrome_options.add_extension(proxyauth_plugin_path)
driver = webdriver.Chrome('C:/Users/Administrator/Desktop/merLan/chromedriver.exe',chrome_options=chrome_options) #指定使用的浏览器       
Linux服务器运行(加上下面几个参数就可以在服务器运行了)
chrome_options = webdriver.ChromeOptions()
chrome_options.add_argument('--headless')
chrome_options.add_argument('--no-sandbox')
chrome_options.add_argument('--disable-gpu')
chrome_options.add_argument('--disable-dev-shm-usage')
webdriver.Chrome('C:/Users/Administrator/Desktop/merLan/chromedriver.exe',chrome_options=chrome_options) #指定使用的浏览器
设置不加载图片
chrome_options = webdriver.ChromeOptions()
prefs = {"profile.managed_default_content_settings.images":2}
chrome_options.add_experimental_option("prefs",prefs)
webdriver.Chrome('C:/Users/Administrator/Desktop/merLan/chromedriver.exe',chrome_options=chrome_options) #指定使用的浏览器
多线程运行
import threading
from selenium import webdriver
def browsers(url):
    chrome_options = webdriver.ChromeOptions()
    port = random.randint(20000,20999)
    ip = "p.webshare.io:{}".format(port)
    chrome_options.add_argument('--headless')
    chrome_options.add_argument('--no-sandbox')
   	chrome_options.add_argument('--disable-gpu')
    chrome_options.add_argument('--disable-dev-shm-usage')
    prefs = {"profile.managed_default_content_settings.images":2}
    chrome_options.add_experimental_option("prefs",prefs)	#设置不加载图片
    chrome_options.add_argument('--proxy-server={}'.format(ip))
    chrome_options.add_experimental_option('excludeSwitches', ['enable-automation'])	#启用开发者模式
    browser = webdriver.Chrome('chromedriver',options=chrome_options)
    browser.get(url)
    param = browser.page_source
    browser.quit()
    return param
for i in range(10):
    t1 = threading.Thread(target=runs)	#设置线程
    t1.start()	#启动线程
    t1.join()
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值