Use Python to get free Internet Protocol


attribute

Technology:

  • threading
  • fake useragent

Usage:
Even u get a ip that this program output, but the free ip is hard to maintain stability.

domestic

import requests
import random
import time
from lxml import etree
from fake_useragent import UserAgent
import re
import threading


def get_proxy_lists():
    proxy_lists = []
    for page_index in range(1, 100):
        # 含有ip池的网站url
        ip_url = 'https://www.89ip.cn/index_{}.html'.format(page_index)
        # 获取含有ip池的网页
        headers = {'User-Agent': UserAgent(verify_ssl=False).random}
        html = requests.get(url=ip_url, headers=headers).text
        elemt = etree.HTML(html)
        ips_list = elemt.xpath('//table/tbody/tr/td[1]/text()')
        ports_list = elemt.xpath('//table/tbody/tr/td[2]/text()')

        for ip, port in zip(ips_list, ports_list):
            # 拼接ip与port
            proxy = ip.strip() + ":" + port.strip()
            proxy_lists.append(proxy)
    return proxy_lists
    pass


def ip_pool_foreign(proxy, ip_pool_foreign_lists, test_url='http://httpbin.org/get'):
    headers = {'User-Agent': UserAgent(verify_ssl=False).random}

    proxies = {
        'http': 'http://{}'.format(proxy),
        'https': 'https://{}'.format(proxy),
    }
    try:
        resp = requests.get(
            url=test_url,
            headers=headers,
            proxies=proxies,  # proxies = {'协议': '协议://IP:端口号'}
            timeout=3)  # timeout 超时设置 网页响应时间3秒 超过时间会抛出异常
        # 获取 状态码为200
        if resp.status_code == 200:
            # print(proxy, 'ok')
            ip_pool_foreign_lists.append(proxy+'\n')
        else:
            pass
            # print(proxy, 'pass')
    except Exception as e:
        pass
        # print(proxy, 'pass')
    pass


if __name__ == '__main__':
    threads = []
    ip_pool_foreign_lists = []
    proxy_lists = get_proxy_lists()
    print(40*'-', 'proxy_lists over.', 40*'-')

    for proxy in proxy_lists:
        t = threading.Thread(target=ip_pool_foreign,
                             args=(proxy, ip_pool_foreign_lists))
        threads.append(t)
    print(40*'-', 'append over.', 40*'-')

    for t in threads:
        t.setDaemon(True)
        t.start()
    print(40*'-', 'start over.', 40*'-')

    for t in threads:
        t.join()
    print(40*'-', 'join over.', 40*'-')

    print(40*'-', '[{%d}]' % len(ip_pool_foreign_lists), 40*'-')
    for ip in ip_pool_foreign_lists:
        print(40*'-', ip, 40*'-')

    with open(r'ip_pool_home.txt', 'w', encoding="utf-8") as fp:
        fp.writelines(ip_pool_foreign_lists)

foreign

import requests
import random
import time
from lxml import etree
from fake_useragent import UserAgent
from bs4 import BeautifulSoup
import re
import threading


def get_proxy_lists(a=1,b=500):
    proxy_lists = []
    for page_index in range(a, b):
        print('\r{}/{}'.format(page_index, b-a))
        # 含有ip池的网站url
        ip_url = 'http://www.66ip.cn/{}.html'.format(page_index)
        # 获取含有ip池的网页
        headers = {'User-Agent': UserAgent(verify_ssl=False).random}
        html = requests.get(url=ip_url, headers=headers).text
        soup = BeautifulSoup(html, 'lxml')
        ips_list = re.findall('<tr><td>(.*?)</td><td>', str(soup))[1:]
        ports_list = re.findall(r'</td><td>(\d*?)</td><td>', str(soup))

        for ip, port in zip(ips_list, ports_list):
            # 拼接ip与port
            proxy = ip.strip() + ":" + port.strip()
            proxy_lists.append(proxy)
        if(page_index % 10 == 0):
            time.sleep(random.randint(0,5));
    return proxy_lists
    pass


def ip_pool_foreign(proxy, ip_pool_foreign_lists, test_url='https://www.pixiv.net/'):
    headers = {'User-Agent': UserAgent(verify_ssl=False).random}

    proxies = {
        'http': 'http://{}'.format(proxy),
        'https': 'https://{}'.format(proxy),
    }
    try:
        resp = requests.get(
            url=test_url,
            headers=headers,
            proxies=proxies,  # proxies = {'协议': '协议://IP:端口号'}
            timeout=3)  # timeout 超时设置 网页响应时间3秒 超过时间会抛出异常
        if resp.status_code == 200:
            # print(proxy, 'ok')
            ip_pool_foreign_lists.append(proxy+'\n')
        else:
            pass
            # print(proxy, 'pass')
    except Exception as e:
        pass
        # print(proxy, 'pass')
    pass


if __name__ == '__main__':
    threads = []
    ip_pool_foreign_lists = []
    proxy_lists = get_proxy_lists()
    print(40*'-', 'proxy_lists over.', 40*'-')

    for proxy in proxy_lists:
        t = threading.Thread(target=ip_pool_foreign,
                             args=(proxy, ip_pool_foreign_lists))
        threads.append(t)
    print(40*'-', 'append over.', 40*'-')

    for t in threads:
        t.setDaemon(True)
        t.start()
    print(40*'-', 'start over.', 40*'-')

    for t in threads:
        t.join()
    print(40*'-', 'join over.', 40*'-')

    print(40*'-', '[{%d}]' % len(ip_pool_foreign_lists), 40*'-')
    for ip in ip_pool_foreign_lists:
        print(40*'-', ip, 40*'-')

    with open(r'Git\pixiv_download\ip_pool_foreign.txt', 'w', encoding="utf-8") as fp:
        fp.writelines(ip_pool_foreign_lists)
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值