爬取公网代理ip

爬取国内公网代理ip,测试代理访问页面是否正常。


package iptest;

import org.apache.commons.io.IOUtils;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.select.Elements;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.*;

public class PaQuIp {

    public static void main(String[] args) throws Exception {

        new Thread(){
            @Override
            public void run() {
                try {xiLa();}catch (Exception e){}
            }
        }.start();
        new Thread(){
            @Override
            public void run() {
                try {xiCi();}catch (Exception e){}
            }
        }.start();
        new Thread(){
            @Override
            public void run() {
                try {k();}catch (Exception e){}
            }
        }.start();
        new Thread(){
            @Override
            public void run() {
                try {yunDaiLi();}catch (Exception e){}
            }
        }.start();

//        System.out.println(ipDiZhi("221.122.91.64"));

    }

    public static void xiLa() throws Exception{
        for(int j=1;j<=2;j++){
            URL url = new URL("http://www.xiladaili.com/gaoni/"+j+"/");
            URLConnection conn = url.openConnection();
            InputStream in = conn.getInputStream();
            String s = IOUtils.toString(in);
            //System.out.println(s);
            Document doc = Jsoup.parse(s);
            Elements trs = doc.select("table").select("tr");
            //System.out.println("西拉代理trs=========>"+trs.size());
            for(int i = 1;i<trs.size();i++){
                Elements tds = trs.get(i).select("td");
                String text = tds.get(0).text();
                if("HTTP,HTTPS代理".equals(tds.get(1).text())){
                    String[] ipPort = text.split(":");
                    String ip1 = ipPort[0];
                    int port = Integer.parseInt(ipPort[1]);
                    keYongIp(ip1,port,"西拉代理");
                }
            }
        }
    }

    public static void xiCi() throws Exception{
        //爬取页面https://www.xicidaili.com/nn/2页面数据
        for(int j=1;j<=1;j++){
            URL url = new URL("https://www.xicidaili.com/nn/"+j);
            URLConnection conn = url.openConnection();
            conn.setRequestProperty("User-Agent", "Mozilla/4.0 compatible; MSIE 5.0;Windows NT; DigExt)");//(主要是这一句)
            InputStream in = conn.getInputStream();
            String s = IOUtils.toString(in);
            //System.out.println(s);
            Document doc = Jsoup.parse(s);
            Elements trs = doc.select("table").select("tr");
            //System.out.println("西次代理trs=========>"+trs.size());
            for(int i = 1;i<trs.size();i++){
                Elements tds = trs.get(i).select("td");
                String ip1 = tds.get(1).text();
                int port = Integer.parseInt(tds.get(2).text());
                keYongIp(ip1,port,"西次代理");
            }
        }
    }

    public static void k() throws Exception{
        //爬取页面https://www.kuaidaili.com/free/inha/2页面数据
        for(int j=1;j<=5;j++){
            URL url = new URL("https://www.kuaidaili.com/free/inha/"+j);
            URLConnection conn = url.openConnection();
            conn.setRequestProperty("User-Agent", "Mozilla/4.0 compatible; MSIE 5.0;Windows NT; DigExt)");//(主要是这一句)
            InputStream in = conn.getInputStream();
            String s = IOUtils.toString(in);
            //System.out.println(s);
            Document doc = Jsoup.parse(s);
            Elements trs = doc.select("table").select("tr");
            //System.out.println("K代理trs=========>"+trs.size());
            for(int i = 1;i<trs.size();i++){
                Elements tds = trs.get(i).select("td");
                String ip1 = tds.get(0).text();
                int port = Integer.parseInt(tds.get(1).text());
                keYongIp(ip1,port,"K代理");
            }
        }
    }

    public static void yunDaiLi() throws Exception{
        //爬取页面https://www.kuaidaili.com/free/inha/2页面数据
        for(int j=1;j<=5;j++){
            URL url = new URL("http://www.ip3366.net/?stype=1&page="+j);
            URLConnection conn = url.openConnection();
            conn.setRequestProperty("User-Agent", "Mozilla/4.0 compatible; MSIE 5.0;Windows NT; DigExt)");//(主要是这一句)
            InputStream in = conn.getInputStream();
            String s = IOUtils.toString(in);
            //System.out.println(s);
            Document doc = Jsoup.parse(s);
            Elements trs = doc.select("table").select("tr");
            //System.out.println("云代理trs=========>"+trs.size());
            for(int i = 1;i<trs.size();i++){
                Elements tds = trs.get(i).select("td");
                String ip1 = tds.get(0).text();
                int port = Integer.parseInt(tds.get(1).text());
                keYongIp(ip1,port,"云代理");
            }
        }
    }

    /**
     * 启动代理访问百度页面是否成功
     * @param hostName
     * @param port
     * @return
     * @throws Exception
     */
    public static void keYongIp(String hostName,int port,String name){

        String urlw = "http://2020.ip138.com/";
        String urlw1 ="https://www.baidu.com/?tn=48021271_8_hao_pg";
        try {
            //Proxy类代理方法
            URL url = new URL(urlw1);
            // 创建代理服务器
            InetSocketAddress addr=null;
            addr=new InetSocketAddress(hostName,port);
            Proxy proxy = new Proxy(Proxy.Type.HTTP, addr); // http 代理
            URLConnection conn = url.openConnection(proxy);
//            conn.setRequestProperty("User-Agent", "Mozilla/4.0 compatible; MSIE 5.0;Windows NT; DigExt)");//(主要是这一句)
            conn.setConnectTimeout(2000);
            conn.setReadTimeout(2000);
            InputStream in = conn.getInputStream();
            String s = IOUtils.toString(in);
//            System.out.println(s);
            if(s.indexOf("百度")>0){
                System.out.println(hostName+":"+port+"==>"+ipDiZhi(hostName)+"==>"+name);
            }else {
                //System.out.println(ip+"========》失败");
            }
        }catch (Exception e){
//            System.out.println(e.getMessage());
            //System.out.println(ip+"========》异常失败");
        }

    }


    /**
     * 获取ip的中文地址名称
     * @param ip
     * @return
     */
    public static String ipDiZhi(String ip) {

        String urlString="http://api.ip138.com/query/?ip="+ip+"&datatype=text";
        //去http://api.ip138.com注册获取token
        String token="";

        try {
            URL url = new URL(urlString);
            HttpURLConnection conn = (HttpURLConnection) url.openConnection();
            conn.setConnectTimeout(5 * 1000);
            conn.setReadTimeout(5 * 1000);
            conn.setDoInput(true);
            conn.setDoOutput(true);
            conn.setUseCaches(false);
            conn.setInstanceFollowRedirects(false);
            conn.setRequestMethod("GET");
            conn.setRequestProperty("token",token);
            int responseCode = conn.getResponseCode();
            if (responseCode == 200) {
                StringBuilder builder = new StringBuilder();
                BufferedReader br = new BufferedReader(new InputStreamReader(conn.getInputStream(),"utf-8"));
                for (String s = br.readLine(); s != null; s = br.readLine()) {
                    builder.append(s);
                }
                br.close();
                return builder.toString().replaceAll("\\d+", "").replaceAll(" ", "").replace("...", "").trim();
            }
        } catch (IOException e) {
            e.printStackTrace();
        }
        return null;
    }


}

实现原理,通过爬取公网免费代理,测试代理是否可以正常访问访问到网页。
需要jar包:jsoup-1.11.2.jar,commons-io-2.6.jar
jar包地址:https://download.csdn.net/download/fmw396718372/12346247

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值