【Python】爬取xici和快代理的免费代理ip


有时候需要做一些代理ip.常见的xici和快代理.下面是爬取他们的代码
使用requests进行爬取

使用Python爬取xici代理的高匿代理ip

import requests
from bs4 import BeautifulSoup
import random


class get_xici_ip():
    # 尝试代理agents增强反反爬
    def random_agent(self):
        user_agents = [
            "Mozilla/5.0 (iPod; U; CPU iPhone OS 4_3_2 like Mac OS X; zh-cn) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8H7 Safari/6533.18.5",
            "Mozilla/5.0 (iPhone; U; CPU iPhone OS 4_3_2 like Mac OS X; zh-cn) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8H7 Safari/6533.18.5",
            "MQQBrowser/25 (Linux; U; 2.3.3; zh-cn; HTC Desire S Build/GRI40;480*800)",
            "Mozilla/5.0 (Linux; U; Android 2.3.3; zh-cn; HTC_DesireS_S510e Build/GRI40) AppleWebKit/533.1 (KHTML, like Gecko) Version/4.0 Mobile Safari/533.1",
            "Mozilla/5.0 (SymbianOS/9.3; U; Series60/3.2 NokiaE75-1 /110.48.125 Profile/MIDP-2.1 Configuration/CLDC-1.1 ) AppleWebKit/413 (KHTML, like Gecko) Safari/413"
            'Mozilla/5.0 (Windows NT 6.1; rv:2.0.1) Gecko/20100101 Firefox/4.0.1',
            'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-us) AppleWebKit/534.50 (KHTML, like Gecko) Version/5.1 Safari/534.50',
            'Opera/9.80 (Windows NT 6.1; U; en) Presto/2.8.131 Version/11.11'
            ]
        return random.choice(user_agents)

    # 尝试代理IP增强反反爬
    def get_ip_list(self, url, headers):
        web_data = requests.get(url, headers=headers)
        soup = BeautifulSoup(web_data.text, 'lxml')
        ips = soup.find_all('tr')
        ip_list = []
        for i in range(1, len(ips)):
            ip_info = ips[i]
            tds = ip_info.find_all('td')
            ip_list.append(tds[1].text + ':' + tds[2].text)
        return ip_list

    def get_random_ip(self, ip_list):
        proxy_list = []
        for ip in ip_list:
            proxy_list.append('http://' + ip)
        proxy_ip = random.choice(proxy_list)
        proxies = {'http': proxy_ip}
        return proxies

    def get_one(self):
        url = 'http://www.xicidaili.com/nn/%s'%random.randint(1,10)
        headers = {
            'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2785.143 Safari/537.36'
        }
        ip_list = self.get_ip_list(url, headers=headers)
        print(ip_list)
        return self.get_random_ip(ip_list)

调用class

c = get_xici_ip()
d = c.get_one()
print(d)

输出结果

['27.25.196.242:9999', '117.91.232.146:9999', '111.177.178.107:9999', '111.177.188.158:9999', '111.177.179.103:9999', '111.177.181.81:9999', '183.148.133.158:9999', '110.52.235.25:9999', '111.177.187.63:9999', '111.177.172.18:9999', '111.177.178.175:9999', '116.209.54.63:9999', '183.148.140.20:9999', '116.209.52.115:9999', '117.90.2.139:9999', '111.177.177.212:9999', '119.102.189.134:9999', '119.102.188.140:9999', '119.102.188.156:9999', '121.61.2.196:9999', '49.86.180.90:9999', '219.139.141.112:9999', '111.177.189.26:9999', '111.177.191.179:9999', '122.192.174.244:9999', '111.177.167.67:9999', '125.123.139.143:9999', '125.126.210.203:9999', '125.123.140.229:9999', '171.41.84.191:9999', '111.177.185.8:9999', '110.52.235.27:9999', '123.163.117.72:9999', '111.181.35.17:9999', '113.121.146.190:9999', '111.176.29.245:9999', '116.209.58.5:9999', '111.177.175.161:9999', '113.122.169.65:9999', '121.61.2.8:808', '121.61.0.140:9999', '111.176.23.161:9999', '116.209.54.236:9999', '171.41.85.124:9999', '125.126.209.156:9999', '180.119.68.211:9999', '111.177.191.214:9999', '58.50.1.139:9999', '59.62.166.108:9999', '115.151.2.63:9999', '111.177.179.41:9999', '171.41.84.200:9999', '115.151.5.40:53128', '59.62.164.163:9999', '121.61.2.128:9999', '116.209.54.117:9999', '111.177.161.26:9999', '125.123.140.246:9999', '111.181.35.55:9999', '125.123.143.70:9999', '171.41.85.163:9999', '112.85.130.88:9999', '121.61.0.165:9999', '171.80.136.10:9999', '111.177.188.81:9999', '115.151.2.101:9999', '171.41.85.201:9999', '113.121.145.6:9999', '121.61.0.98:9999', '171.41.86.14:9999', '111.177.172.77:9999', '111.177.171.222:9999', '110.52.235.11:9999', '183.148.145.122:9999', '110.52.235.206:9999', '111.177.189.246:9999', '110.52.235.237:9999', '58.50.3.137:9999', '117.90.137.148:9999', '116.209.58.116:9999', '116.209.53.154:9999', '110.52.235.123:9999', '175.165.146.223:1133', '115.151.3.7:9999', '116.209.54.220:9999', '111.79.198.71:9999', '115.151.2.189:9999', '116.209.54.48:9999', '116.209.54.235:9999', '116.7.176.29:8118', '59.62.165.245:9999', '115.151.7.159:9999', '222.189.190.47:9999', '183.15.121.77:3128', '111.177.170.247:9999', '111.181.61.163:9999', '112.85.170.173:9999', '115.151.2.37:9999', '116.209.56.92:9999', '121.61.2.242:9999']
{'http': 'http://183.148.140.20:9999'}

使用Python爬取快代理的高匿代理ip

随机获取其中一个IP地址

import requests
from bs4 import BeautifulSoup
import random


class get_kuaidaili_ip():
    # 尝试代理agents增强反反爬
    def random_agent(self):
        user_agents = [
            "Mozilla/5.0 (iPod; U; CPU iPhone OS 4_3_2 like Mac OS X; zh-cn) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8H7 Safari/6533.18.5",
            "Mozilla/5.0 (iPhone; U; CPU iPhone OS 4_3_2 like Mac OS X; zh-cn) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8H7 Safari/6533.18.5",
            "MQQBrowser/25 (Linux; U; 2.3.3; zh-cn; HTC Desire S Build/GRI40;480*800)",
            "Mozilla/5.0 (Linux; U; Android 2.3.3; zh-cn; HTC_DesireS_S510e Build/GRI40) AppleWebKit/533.1 (KHTML, like Gecko) Version/4.0 Mobile Safari/533.1",
            "Mozilla/5.0 (SymbianOS/9.3; U; Series60/3.2 NokiaE75-1 /110.48.125 Profile/MIDP-2.1 Configuration/CLDC-1.1 ) AppleWebKit/413 (KHTML, like Gecko) Safari/413"
            'Mozilla/5.0 (Windows NT 6.1; rv:2.0.1) Gecko/20100101 Firefox/4.0.1',
            'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-us) AppleWebKit/534.50 (KHTML, like Gecko) Version/5.1 Safari/534.50',
            'Opera/9.80 (Windows NT 6.1; U; en) Presto/2.8.131 Version/11.11'
            ]
        return random.choice(user_agents)

    # 尝试代理IP增强反反爬
    def get_ip_list(self, url, headers):
        web_data = requests.get(url, headers=headers)
        soup = BeautifulSoup(web_data.text, 'lxml')
        ips = soup.find_all('tr')
        ip_list = []
        for i in range(1, len(ips)):
            ip_info = ips[i]
            tds = ip_info.find_all('td')
            ip_list.append(tds[0].text + ':' + tds[1].text)
        return ip_list

    def get_random_ip(self, ip_list):
        proxy_list = []
        for ip in ip_list:
            proxy_list.append('http://' + ip)
        proxy_ip = random.choice(proxy_list)
        proxies = {'http': proxy_ip}
        return proxies

    def get_one(self):
        # url = 'http://www.xicidaili.com/nn/5'
        url = 'https://www.kuaidaili.com/free/inha/%s/'%random.randint(1,10)
        headers = {
            'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2785.143 Safari/537.36'
        }
        ip_list = self.get_ip_list(url, headers=headers)
        print(ip_list)
        return self.get_random_ip(ip_list)

调用class

c = get_kuaidaili_ip()
d = c.get_one()
print(d)

输出结果:

['121.61.27.120:9999', '163.204.242.44:9999', '115.151.5.138:9999', '121.239.127.128:9999', '1.192.245.72:9999', '121.232.194.13:9000', '125.123.136.50:9999', '60.13.42.8:9999', '111.177.169.209:9999', '183.147.30.228:9000', '110.52.235.238:9999', '180.118.128.86:9999', '49.89.85.101:9999', '163.204.245.36:9999', '115.151.7.86:9999']
{'http': 'http://111.177.169.209:9999'}
  • 4
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 2
    评论
评论 2
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值