国内免费代理中制作有效地址的地址池

#!C:\Python3.7
# -*- coding:utf-8 -*-
import requests
from lxml import etree
import os

import optparse
base_url = "https://www.kuaidaili.com/free/inha/"

def get_proxy_IP_port(url):
    print("抓取网页:",url)
    ipAndPortList =[]
    try:
        req = requests.get(url)
        #print(req.status_code)
        html = etree.HTML(req.text)
        tr_lists = html.xpath('//*[@id="list"]//tbody/tr')
        for tr in tr_lists:
            ip = tr.xpath('./td[@data-title="IP"]/text()')[0]
            port = tr.xpath('./td[@data-title="PORT"]/text()')[0]
            if test_proxy(ip,port) ==True:
                ipAndPortList.append(ip+":"+port)

        return ipAndPortList

    except Exception as e:
        print(e)
        pass


def test_proxy(ip,port):
    proxies = {'http':'http://'+str(ip)+':'+str(port)}
    req = requests.get(url="https://www.baidu.com/",proxies=proxies)
    if req.status_code==200:
        return True
    else:
        return False


def save_prox(filename,proxy_list):
    base_path = os.getcwd()
    path = os.path.join(base_path,filename)

    with open(filename,'w+') as fw:
        for proxy in proxy_list:
            fw.write(proxy+'\n')

    print("proxy save file :",path)



def main():
    num = input("输入抓取代理个数:")



    proxy_list=[]
    page =1

    while len(proxy_list)<int(num):
        url =base_url+str(page)
        proxy_list = proxy_list +get_proxy_IP_port(url)
        page = page+1

    print(len(proxy_list))
    #print(proxy_list)
    save_prox("proxy.txt",proxy_list)


if __name__ == '__main__':
    main()
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值