抓取IP(三)

IpDemo3.java

package com.htjf.ip3;

import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;
import java.net.URL;
import java.net.URLConnection;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Date;
import java.util.HashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Random;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import com.htjf.ip.IpModel;
import com.htjf.ip2.IpData;
import com.htjf.ip2.IpSpecial;

/**
 * @author Qixuan
 * 
 */
public class IpDemo3 {

	/**
	 * @param args
	 *            处理特殊问题的IP
	 */
	public static void main(String args[]) {
		System.out.println("Java Project input:");
		SqlIpSpecial sis = new SqlIpSpecial();
		int num = sis.findIpCount();

		int k = 0;
		while (k < num) {
			List<IpSpecial> iplist = sis.findIp(k, 100);
			int j = 0;
			for (IpSpecial ipmodel : iplist) {
				System.out.println("===" + j);
				IpData ipdata = new IpData();
				j++;
				String startIp = ipmodel.getStartIp();
				String endIp = ipmodel.getEndIp();
				System.out.println("startIp:" + startIp);
				System.out.println("endIp:" + endIp);

				Ip2Long ip2Long = new Ip2Long();
				long siplong = ip2Long.ipToLong(startIp);
				long eiplong = ip2Long.ipToLong(endIp);
				String startIptemp = "";
				String endIptemp = "";
				Map<String, String> ipmap2 = new HashMap<String, String>();
				LinkedList<Map<String, String>> mapList = new LinkedList<Map<String, String>>();

				for (long i = siplong; i < eiplong; i = i + 256) {
					String iptemp = ip2Long.longToIP(i);
					System.out.println("-------iptemp:" + iptemp);
					// 发送请求
					try {

						Map<String, String> ipmap = requestPost(iptemp);// 偶数
						mapList.addFirst(ipmap);
						/*
						 * if(mapList.size()>=3){
						 * System.out.println("数据KO---------");
						 * mapList.removeLast(); }
						 */

						if (mapList.size() >= 2) {
							System.out.println("----1最顶的first----"
									+ mapList.getFirst().get("startIp"));
							System.out.println("----2最低的last----"
									+ mapList.get(1).get("startIp"));

							if (mapList.getFirst().get("city").equals("")) {
								System.out.println("有异常Ip");
							} else {
								if (mapList.getFirst().get("city")
										.equals(mapList.get(1).get("city"))) {
									System.out.println("同一IP段");// 同一IP段继续循还
								} else {
									System.out.println("不同一IP段");
									// 不同一个IP段就拿前面这一个map的数据当作结束IP,来分段Ip,并把这个IP段存进数据库
									ipdata.setIpId(mapList.getLast()
											.get("ipId"));
									ipdata.setStartIp(mapList.getLast().get(
											"startIp"));
									ipdata.setCountry(mapList.getLast().get(
											"country"));
									ipdata.setProvince(mapList.getLast().get(
											"province"));// 省份
									ipdata.setCity(mapList.getLast()
											.get("city"));// 城市
									ipdata.setOperator(mapList.getLast().get(
											"operator"));// 运营商

									int endcount = mapList.size() - 1;
									endIptemp = mapList.get(
											mapList.size() - endcount).get(
											"startIp");// 上一个Ip段的结束ip
									ipdata.setEndIp(endIptemp);

									startIptemp = mapList.getFirst().get(
											"startIp");// 另一个IP段的起始Ip
									mapList.remove();// /删除表头
									mapList.clear();// 删除所有

									// /入库操作
									sis.insertIp(ipdata);// /System.out.println("---数据添加---");

								}
							}
						} else {
							System.out.println("数据还没准备好---------");
						}

					} catch (IOException e1) {
						// TODO Auto-generated catch block
						e1.printStackTrace();
					}
				}

				/*
				 * if(ipdata.getCity().equals(ipSpecial.getCity())){
				 * ipdata.setEndIp(ipSpecial.getEndIp());
				 * sqlData.insertIp(ipdata);///System.out.println("---数据添加---");
				 * 
				 * }else{ ipSpecial.setStartIp(ipdata.getStartIp());
				 * sqlData.insertIpSpecial(ipSpecial);
				 * System.out.println("---特殊IP---"); }
				 */

			}
			k = k + iplist.size();
		}

	}

	/**
	 * @param ipString
	 * @throws IOException
	 *             发送请求
	 */
	public static Map<String, String> requestPost(String ipString)
			throws IOException {

		URL url = new URL("http://199604.com/ip/");
		URLConnection connection = url.openConnection();
		/**
		 * 然后把连接设为输出模式。URLConnection通常作为输入来使用,比如下载一个Web页。
		 * 通过把URLConnection设为输出,你可以把数据向你个Web页传送。下面是如何做:
		 */

		connection.setConnectTimeout(500000);
		connection.setDoOutput(true); // 是否向服务器发送数据
		connection.setReadTimeout(300000);
		/**
		 * 最后,为了得到OutputStream,简单起见,把它约束在Writer并且放入POST信息中,例如: ...
		 */
		OutputStreamWriter out = new OutputStreamWriter(
				connection.getOutputStream(), "8859_1");
		out.write("ip=" + ipString + "&action=2"); // 向页面传递数据。post的关键所在!
		// out.write("username=kevin&password=*********"); //向页面传递数据。post的关键所在!
		// remember to clean up
		out.flush();
		out.close();
		/**
		 * 这样就可以发送一个看起来象这样的POST: POST /jobsearch/jobsearch.cgi HTTP 1.0 ACCEPT:
		 * text/plain Content-type: application/x-www-form-urlencoded
		 * Content-length: 99 username=bob password=someword
		 */
		// 一旦发送成功,用以下方法就可以得到服务器的回应:
		String sCurrentLine;
		String sTotalString;
		sCurrentLine = "";
		sTotalString = "";
		InputStream l_urlStream;
		l_urlStream = connection.getInputStream();// 获取返回的Html内容
		// 传说中的三层包装阿!

		BufferedReader l_reader = new BufferedReader(new InputStreamReader(
				l_urlStream));

		String html_regex = "<(.[^>]*)>";// /过滤标签的规则
		Pattern p = Pattern.compile(html_regex);// 将规则封装成对象

		BufferedWriter bufw = new BufferedWriter(new OutputStreamWriter(
				new FileOutputStream("E://IpHTML.txt")));
		while ((sCurrentLine = l_reader.readLine()) != null) {

			sCurrentLine = sCurrentLine.replaceAll("<tr>", "ipOperator:");
			sCurrentLine = sCurrentLine.replaceAll("</td>", ",");
			sCurrentLine = sCurrentLine.replaceAll(html_regex, "");
			bufw.write(sCurrentLine);
			bufw.newLine();// /换行
			bufw.flush();// 刷新

		}
		bufw.close();
		System.out.println("第一次过滤完毕,开始下一轮过滤");
		String ipstr = saveIPOperator();
		System.out.println("第一次过滤完毕,开始下一轮过滤");
		Map<String, String> ipmap = saveIPOperator2(ipstr, ipString);

		return ipmap;
	}

	/**
	 * @return
	 * @throws IOException
	 *             过滤一
	 */
	public static String saveIPOperator() throws IOException {
		BufferedReader bufr = new BufferedReader(new InputStreamReader(
				new FileInputStream("E://IpHTML.txt")));
		BufferedWriter bufw = new BufferedWriter(new OutputStreamWriter(
				new FileOutputStream("E://IpOperator1.txt")));
		String ip_regex1 = "ipOperator:";// /IP的匹配规则
		int k = 1;
		String ipstr = "";
		String line = null;
		while ((line = bufr.readLine()) != null) {

			Pattern p = Pattern.compile(ip_regex1);// 将规则封装成对象
			Matcher m = p.matcher(line);// 一行一行地进行匹配

			while (m.find()) {
				if (k == 2) {
					Pattern p2 = Pattern.compile("\\s*|\t|\r|\n");
					Matcher m2 = p2.matcher(line);
					String line2 = m2.replaceAll("");
					ipstr = line2;
					bufw.write(line2);
					bufw.newLine();// /换行
					bufw.flush();// 刷新
				}
				k++;
			}
		}

		bufw.close();

		return ipstr;

	}

	/**
	 * @param ipstr
	 * @throws IOException
	 */
	public static Map<String, String> saveIPOperator2(String ipstr,
			String ipString) throws IOException {
		String iparray[] = new String[2];
		// IpData ipdata=new IpData();
		iparray = ipstr.split("\\:");
		String ipos[] = iparray[1].split(",");
		/*
		 * for(int i=0;i<ipos.length;i++){ System.out.println("===ip:"+ipos[i]);
		 * }
		 */

		BufferedWriter bufw = new BufferedWriter(new OutputStreamWriter(
				new FileOutputStream("E://BugIp.txt", true)));

		Map<String, String> ipmap2 = new HashMap<String, String>();
		ipmap2.put("Sip", "");

		SimpleDateFormat sdf = new SimpleDateFormat("yyyyMMddhhmmss");
		String ipId = getRandomString(14) + sdf.format(new Date());
		Map<String, String> ipmap = new HashMap<String, String>();
		ipmap.put("ipId", ipId);
		ipmap.put("startIp", ipString);

		try {
			ipmap.put("country", ipos[1]);

		} catch (Exception e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
			ipmap.put("country", "");
			ipmap2.put("Sip", ipString);
		}

		try {
			ipmap.put("province", ipos[2]);

		} catch (Exception e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
			ipmap.put("province", "");
			ipmap2.put("Sip", ipString);
		}

		try {
			ipmap.put("city", ipos[3]);

		} catch (Exception e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
			ipmap.put("city", "");
			ipmap2.put("Sip", ipString);
		}

		try {
			ipmap.put("county", ipos[4]);

		} catch (Exception e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
			ipmap.put("county", "");
		}

		try {
			ipmap.put("operator", ipos[5]);
		} catch (Exception e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
			ipmap.put("operator", "");
			ipmap2.put("Sip", ipString);
		}

		System.out.println("----------" + ipmap2.get("Sip"));
		bufw.write(ipmap2.get("Sip"));
		bufw.newLine();
		bufw.flush();
		bufw.close();

		/*
		 * ipdata.setIpId(ipId); ipdata.setStartIp(ipos[0]);
		 * ipdata.setCountry(ipos[1]); ipdata.setProvince(ipos[2]);//省份
		 * ipdata.setCity(ipos[3]);//城市 ipdata.setOperator(ipos[5]);//运营商
		 */

		return ipmap;

	}

	/**
	 * @param length
	 * @return 生成随机数
	 */
	public static String getRandomString(int length) { // length表示生成字符串的长度
		String base = "abcdefghijklmnopqrstuvwxyz0123456789";
		Random random = new Random();
		StringBuffer sb = new StringBuffer();
		for (int i = 0; i < length; i++) {
			int number = random.nextInt(base.length());
			sb.append(base.charAt(number));
		}
		return sb.toString();
	}

}// 

class SqlIpSpecial {

	public static String username;
	public static String password;
	public static Connection connection;
	public static PreparedStatement ps;

	// //构造函数
	public SqlIpSpecial() {

		String url = "jdbc:mysql://127.0.0.1:3306/ipselect?useUnicode=true&characterEncoding=utf8&zeroDateTimeBehavior=convertToNull";
		String username = "root";
		String password = "";
		// 加载驱动程序以连接数据库
		try {
			Class.forName("com.mysql.jdbc.Driver");
			connection = DriverManager.getConnection(url, username, password);
		}
		// 捕获加载驱动程序异常
		catch (ClassNotFoundException cnfex) {
			System.err.println("装载 JDBC/ODBC 驱动程序失败");
			cnfex.printStackTrace();
		}
		// 捕获连接数据库异常
		catch (SQLException sqlex) {
			System.err.println("无法连接数据库");
			sqlex.printStackTrace();
		}

	}

	/**
	 * @param ipModel
	 * @return 查询 数据
	 */
	public int findIpCount() {
		// java.util.List<IpModel> list = new ArrayList<IpModel>();
		int num = 0;
		try {

			ps = connection.prepareStatement("select count(*) from ipspecial");
			ResultSet rs = ps.executeQuery();
			rs.next();
			num = rs.getInt(1);
			// ps.close();

		} catch (SQLException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		}
		System.out.println("====count:" + num);

		return num;

	}

	/**
	 * @param ipModel
	 * @return 查询 数据specialIp
	 */
	public List<IpSpecial> findIp(int offset, int amount) {
		java.util.List<IpSpecial> listipspecial = new ArrayList<IpSpecial>();
		try {

			ps = connection
					.prepareStatement("select * from ipspecial limit ?,?");
			ps.setInt(1, offset);
			ps.setInt(2, amount);

			ResultSet rs = ps.executeQuery();

			while (rs.next()) {
				IpSpecial ipmodel2 = new IpSpecial();
				ipmodel2.setIpId(rs.getString("ip_id"));
				ipmodel2.setStartIp(rs.getString("startIp"));
				System.out.println("iptable:" + rs.getString("startIp"));
				ipmodel2.setEndIp(rs.getString("endIp"));
				System.out.println("iptable:" + rs.getString("endIp"));
				listipspecial.add(ipmodel2);
			}

		} catch (SQLException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		}

		return listipspecial;

	}

	/**
	 * @param ipModel
	 *            添加数据到ipdata数据表,符合要求的Ip
	 */
	public void insertIp(IpData ipData) {
		SqlIpSpecial ipsql = new SqlIpSpecial();
		List<IpData> list = ipsql.findIpData(ipData);
		if (list.size() > 0) {
			System.out.println("已存在有数据");
		} else {
			try {

				ps = connection
						.prepareStatement("insert into ipdata (ip_id,country,province,city,county,operator,startIp,endIp) values (?,?,?,?,?,?,?,?,?)");
				/*
				 * SimpleDateFormat sdf=new SimpleDateFormat("yyyyMMddhhmmss");
				 * String ipId=sdf.format(new Date());
				 */
				ps.setString(1, ipData.getIpId());
				ps.setString(2, ipData.getCountry());
				ps.setString(3, ipData.getProvince());
				ps.setString(4, ipData.getCity());
				ps.setString(5, ipData.getCounty());
				ps.setString(6, ipData.getOperator());
				ps.setString(7, ipData.getStartIp());
				ps.setString(8, ipData.getEndIp());
				ps.setDate(9, new java.sql.Date(new Date().getDate()));
				ps.executeUpdate();
				System.out.println("------记录插入成功------");

			} catch (SQLException e) {
				// TODO Auto-generated catch block
				e.printStackTrace();
			}
		}
	}

	/**
	 * @return 查询IPData,符合要求的IP的数据表
	 */
	public List<IpData> findIpData(IpData ipData11) {
		java.util.List<IpData> list = new ArrayList<IpData>();
		try {

			ps = connection
					.prepareStatement("select * from ipdata where startIp=? and endIp=?");
			ps.setString(1, ipData11.getStartIp());
			ps.setString(2, ipData11.getEndIp());

			ResultSet rs = ps.executeQuery();
			IpData ipData21 = new IpData();

			while (rs.next()) {
				ipData21.setIpId(rs.getString("ip_id"));
				ipData21.setStartIp(rs.getString("startIp"));
				ipData21.setEndIp(rs.getString("endIp"));
				list.add(ipData21);
			}

		} catch (SQLException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		}

		return list;

	}

}

Ip2Long.java

package com.htjf.ip3;

/**
 * @author Qixuan
 * 
 */
public class Ip2Long {

	// 将127.0.0.1形式的IP地址转换成十进制整数,这里没有进行任何错误处理
	public static long ipToLong(String strIp) {
		long[] ip = new long[4];
		// 先找到IP地址字符串中.的位置
		int position1 = strIp.indexOf(".");
		int position2 = strIp.indexOf(".", position1 + 1);
		int position3 = strIp.indexOf(".", position2 + 1);
		// 将每个.之间的字符串转换成整型
		ip[0] = Long.parseLong(strIp.substring(0, position1));
		ip[1] = Long.parseLong(strIp.substring(position1 + 1, position2));
		ip[2] = Long.parseLong(strIp.substring(position2 + 1, position3));
		ip[3] = Long.parseLong(strIp.substring(position3 + 1));
		return (ip[0] << 24) + (ip[1] << 16) + (ip[2] << 8) + ip[3];
	}

	// 将十进制整数形式转换成127.0.0.1形式的ip地址
	public static String longToIP(long longIp) {
		StringBuffer sb = new StringBuffer("");
		// 直接右移24位
		sb.append(String.valueOf((longIp >>> 24)));
		sb.append(".");
		// 将高8位置0,然后右移16位
		sb.append(String.valueOf((longIp & 0x00FFFFFF) >>> 16));
		sb.append(".");
		// 将高16位置0,然后右移8位
		sb.append(String.valueOf((longIp & 0x0000FFFF) >>> 8));
		sb.append(".");
		// 将高24位置0
		sb.append(String.valueOf((longIp & 0x000000FF)));
		return sb.toString();
	}

	/** */
	/**
	 * @param args
	 */
	public static void main(String[] args) {
		String ipStr = "192.168.0.1";
		long longIp = Ip2Long.ipToLong(ipStr);
		System.out.println("192.168.0.1 的整数形式为:" + longIp);
		System.out.println("整数" + longIp + "转化成字符串IP地址:"
				+ Ip2Long.longToIP(longIp));
		// ip地址转化成二进制形式输出
		System.out
				.println("192.168.0.1 的二进制形式为:" + Long.toBinaryString(longIp));

	}
}

ipdata.java

package com.htjf.ip2;

/**
 * @author Administrator
 *
 */
public class IpData {
	private String ipId;
	private String country;//国家地区
	private String province;//省份
	private String city;//城市
	private String county;//县
	private String operator;//运营商
	private String startIp;//起始Ip
	private String endIp;//结束Ip
	private String addTime;
	
	
	
	public String getIpId() {
		return ipId;
	}
	public void setIpId(String ipId) {
		this.ipId = ipId;
	}
	public String getCountry() {
		return country;
	}
	public void setCountry(String country) {
		this.country = country;
	}
	public String getProvince() {
		return province;
	}
	public void setProvince(String province) {
		this.province = province;
	}
	public String getCity() {
		return city;
	}
	public void setCity(String city) {
		this.city = city;
	}
	public String getCounty() {
		return county;
	}
	public void setCounty(String county) {
		this.county = county;
	}
	public String getOperator() {
		return operator;
	}
	public void setOperator(String operator) {
		this.operator = operator;
	}
	public String getStartIp() {
		return startIp;
	}
	public void setStartIp(String startIp) {
		this.startIp = startIp;
	}
	public String getEndIp() {
		return endIp;
	}
	public void setEndIp(String endIp) {
		this.endIp = endIp;
	}
	public String getAddTime() {
		return addTime;
	}
	public void setAddTime(String addTime) {
		this.addTime = addTime;
	}
	
}

IpSpecial.java

package com.htjf.ip2;

public class IpSpecial {
	private String ipId;
	private String country;//国家地区
	private String province;//省份
	private String city;//城市
	private String county;//县
	private String operator;//运营商
	private String startIp;//起始Ip
	private String endIp;//结束Ip
	
	
	public String getIpId() {
		return ipId;
	}
	public void setIpId(String ipId) {
		this.ipId = ipId;
	}
	public String getCountry() {
		return country;
	}
	public void setCountry(String country) {
		this.country = country;
	}
	public String getProvince() {
		return province;
	}
	public void setProvince(String province) {
		this.province = province;
	}
	public String getCity() {
		return city;
	}
	public void setCity(String city) {
		this.city = city;
	}
	public String getCounty() {
		return county;
	}
	public void setCounty(String county) {
		this.county = county;
	}
	public String getOperator() {
		return operator;
	}
	public void setOperator(String operator) {
		this.operator = operator;
	}
	public String getStartIp() {
		return startIp;
	}
	public void setStartIp(String startIp) {
		this.startIp = startIp;
	}
	public String getEndIp() {
		return endIp;
	}
	public void setEndIp(String endIp) {
		this.endIp = endIp;
	}
	
	
}


评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值