反爬_代理ip

最新推荐文章于 2024-05-05 10:01:23 发布

六·柒

最新推荐文章于 2024-05-05 10:01:23 发布

阅读量232

点赞数

分类专栏：数据采集

本文链接：https://blog.csdn.net/qq_43000917/article/details/105047600

版权

数据采集专栏收录该内容

14 篇文章 0 订阅

订阅专栏

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# @time  :2020-03-23 11:53:22

# coding=utf8
import random

import requests
from bs4 import BeautifulSoup
import re
import os.path

user_agent = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_5)'
headers = {'User-Agent': user_agent}


def getListProxies():
    session = requests.session()
    page = session.get("http://www.xicidaili.com/nn", headers=headers)
    soup = BeautifulSoup(page.text, 'lxml')

    proxyList = []
    taglist = soup.find_all('tr', attrs={'class': re.compile("(odd)|()")})
    for trtag in taglist:
        tdlist = trtag.find_all('td')
        proxy = {'http':'http://' +tdlist[1].string + ':' + tdlist[2].string,
                 'https': 'https://' +tdlist[1].string + ':' + tdlist[2].string}
        # url = "http://ip.chinaz.com/getip.aspx"  # 用来测试IP是否可用的url(现在该网址好像不能使用)
        # try:
        #     print('proxy is ',proxy)
        #     response = session.get(url, proxies=proxy, timeout=5)
        #     print(response)
        #     proxyList.append(proxy)
        #     if (len(proxyList) == 3):
        #         break
        # except Exception, e:
        #     continue

        proxyList.append(proxy)
        # 设定代理ip个数
        if len(proxyList) >= 10:
            break

    return proxyList

def test_ip():
    url = 'http://httpbin.org/get'
    # IP_list = getListProxies()

    # IP = getListProxies()[random.randint(0,10)]
    # print(IP)
    # res = requests.get(url,proxies=IP)
    # print(res.text)
    url = 'http://httpbin.org/get'

    IP_list = getListProxies()
    while True:
        if len(IP_list) == 0:
            test_ip()
        try:
            IP = IP_list.pop()
            print(IP)
            res = requests.get(url, proxies=IP)
            print(res.text)
            return IP
        except:
            pass

if __name__ == '__main__':
    test_ip()

免费代理
https://ip.jiangxianli.com/country/%E7%BE%8E%E5%9B%BD?country=%E7%BE%8E%E5%9B%BD

def get_g_proxyip():
    url = 'https://ip.jiangxianli.com/country/%E7%BE%8E%E5%9B%BD?country=%E7%BE%8E%E5%9B%BD'
    Headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.97 Safari/537.36',

    }
    t_list = []
    try:
        res = requests.get(url, headers=Headers)
        html = etree.HTML(res.text)
        data_list = html.xpath('//table[@class="layui-table"]//tr')

        for data in data_list[1:]:
            ip = ''.join(data.xpath('./td[1]/text()'))+ ':'+ ''.join(data.xpath('./td[2]/text()'))
            t_list.append({'http':'http://'+ip,'https':'https://'+ip,})

    except Exception as e:
        print(e)
        get_g_proxyip()

    return t_list