爬取国内公网代理ip,测试代理访问页面是否正常。
package iptest;
import org.apache.commons.io.IOUtils;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.select.Elements;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.*;
public class PaQuIp {
public static void main(String[] args) throws Exception {
new Thread(){
@Override
public void run() {
try {xiLa();}catch (Exception e){}
}
}.start();
new Thread(){
@Override
public void run() {
try {xiCi();}catch (Exception e){}
}
}.start();
new Thread(){
@Override
public void run() {
try {k();}catch (Exception e){}
}
}.start();
new Thread(){
@Override
public void run() {
try {yunDaiLi();}catch (Exception e){}
}
}.start();
// System.out.println(ipDiZhi("221.122.91.64"));
}
public static void xiLa() throws Exception{
for(int j=1;j<=2;j++){
URL url = new URL("http://www.xiladaili.com/gaoni/"+j+"/");
URLConnection conn = url.openConnection();
InputStream in = conn.getInputStream();
String s = IOUtils.toString(in);
//System.out.println(s);
Document doc = Jsoup.parse(s);
Elements trs = doc.select("table").select("tr");
//System.out.println("西拉代理trs=========>"+trs.size());
for(int i = 1;i<trs.size();i++){
Elements tds = trs.get(i).select("td");
String text = tds.get(0).text();
if("HTTP,HTTPS代理".equals(tds.get(1).text())){
String[] ipPort = text.split(":");
String ip1 = ipPort[0];
int port = Integer.parseInt(ipPort[1]);
keYongIp(ip1,port,"西拉代理");
}
}
}
}
public static void xiCi() throws Exception{
//爬取页面https://www.xicidaili.com/nn/2页面数据
for(int j=1;j<=1;j++){
URL url = new URL("https://www.xicidaili.com/nn/"+j);
URLConnection conn = url.openConnection();
conn.setRequestProperty("User-Agent", "Mozilla/4.0 compatible; MSIE 5.0;Windows NT; DigExt)");//(主要是这一句)
InputStream in = conn.getInputStream();
String s = IOUtils.toString(in);
//System.out.println(s);
Document doc = Jsoup.parse(s);
Elements trs = doc.select("table").select("tr");
//System.out.println("西次代理trs=========>"+trs.size());
for(int i = 1;i<trs.size();i++){
Elements tds = trs.get(i).select("td");
String ip1 = tds.get(1).text();
int port = Integer.parseInt(tds.get(2).text());
keYongIp(ip1,port,"西次代理");
}
}
}
public static void k() throws Exception{
//爬取页面https://www.kuaidaili.com/free/inha/2页面数据
for(int j=1;j<=5;j++){
URL url = new URL("https://www.kuaidaili.com/free/inha/"+j);
URLConnection conn = url.openConnection();
conn.setRequestProperty("User-Agent", "Mozilla/4.0 compatible; MSIE 5.0;Windows NT; DigExt)");//(主要是这一句)
InputStream in = conn.getInputStream();
String s = IOUtils.toString(in);
//System.out.println(s);
Document doc = Jsoup.parse(s);
Elements trs = doc.select("table").select("tr");
//System.out.println("K代理trs=========>"+trs.size());
for(int i = 1;i<trs.size();i++){
Elements tds = trs.get(i).select("td");
String ip1 = tds.get(0).text();
int port = Integer.parseInt(tds.get(1).text());
keYongIp(ip1,port,"K代理");
}
}
}
public static void yunDaiLi() throws Exception{
//爬取页面https://www.kuaidaili.com/free/inha/2页面数据
for(int j=1;j<=5;j++){
URL url = new URL("http://www.ip3366.net/?stype=1&page="+j);
URLConnection conn = url.openConnection();
conn.setRequestProperty("User-Agent", "Mozilla/4.0 compatible; MSIE 5.0;Windows NT; DigExt)");//(主要是这一句)
InputStream in = conn.getInputStream();
String s = IOUtils.toString(in);
//System.out.println(s);
Document doc = Jsoup.parse(s);
Elements trs = doc.select("table").select("tr");
//System.out.println("云代理trs=========>"+trs.size());
for(int i = 1;i<trs.size();i++){
Elements tds = trs.get(i).select("td");
String ip1 = tds.get(0).text();
int port = Integer.parseInt(tds.get(1).text());
keYongIp(ip1,port,"云代理");
}
}
}
/**
* 启动代理访问百度页面是否成功
* @param hostName
* @param port
* @return
* @throws Exception
*/
public static void keYongIp(String hostName,int port,String name){
String urlw = "http://2020.ip138.com/";
String urlw1 ="https://www.baidu.com/?tn=48021271_8_hao_pg";
try {
//Proxy类代理方法
URL url = new URL(urlw1);
// 创建代理服务器
InetSocketAddress addr=null;
addr=new InetSocketAddress(hostName,port);
Proxy proxy = new Proxy(Proxy.Type.HTTP, addr); // http 代理
URLConnection conn = url.openConnection(proxy);
// conn.setRequestProperty("User-Agent", "Mozilla/4.0 compatible; MSIE 5.0;Windows NT; DigExt)");//(主要是这一句)
conn.setConnectTimeout(2000);
conn.setReadTimeout(2000);
InputStream in = conn.getInputStream();
String s = IOUtils.toString(in);
// System.out.println(s);
if(s.indexOf("百度")>0){
System.out.println(hostName+":"+port+"==>"+ipDiZhi(hostName)+"==>"+name);
}else {
//System.out.println(ip+"========》失败");
}
}catch (Exception e){
// System.out.println(e.getMessage());
//System.out.println(ip+"========》异常失败");
}
}
/**
* 获取ip的中文地址名称
* @param ip
* @return
*/
public static String ipDiZhi(String ip) {
String urlString="http://api.ip138.com/query/?ip="+ip+"&datatype=text";
//去http://api.ip138.com注册获取token
String token="";
try {
URL url = new URL(urlString);
HttpURLConnection conn = (HttpURLConnection) url.openConnection();
conn.setConnectTimeout(5 * 1000);
conn.setReadTimeout(5 * 1000);
conn.setDoInput(true);
conn.setDoOutput(true);
conn.setUseCaches(false);
conn.setInstanceFollowRedirects(false);
conn.setRequestMethod("GET");
conn.setRequestProperty("token",token);
int responseCode = conn.getResponseCode();
if (responseCode == 200) {
StringBuilder builder = new StringBuilder();
BufferedReader br = new BufferedReader(new InputStreamReader(conn.getInputStream(),"utf-8"));
for (String s = br.readLine(); s != null; s = br.readLine()) {
builder.append(s);
}
br.close();
return builder.toString().replaceAll("\\d+", "").replaceAll(" ", "").replace("...", "").trim();
}
} catch (IOException e) {
e.printStackTrace();
}
return null;
}
}
实现原理,通过爬取公网免费代理,测试代理是否可以正常访问访问到网页。
需要jar包:jsoup-1.11.2.jar,commons-io-2.6.jar
jar包地址:https://download.csdn.net/download/fmw396718372/12346247