ip库处理

原创 2015年11月19日 16:37:17

原始的纯真ip库有两个问题

1、地区没有拆分国家省市区县,需要程序二次拆分

2、有一些不规范数据,即学校网吧之类的,排重手工整理


程序里的ipdata  xuexiao.csv可见http://download.csdn.net/detail/u011750989/9283149

package com.java.ipku;


import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.util.HashMap;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;


public class IpUtil {
	
	static String[] provinces={"北京市","天津市","上海市","重庆市","河北省","山西省","辽宁省","吉林省","黑龙江省","江苏省","浙江省","安徽省","福建省","江西省","山东省","河南省","湖北省","湖南省","广东省","海南省","四川省","贵州省","云南省","陕西省","甘肃省","青海省","台湾省","内蒙古","广西","西藏","宁夏","新疆","香港","澳门"};
	static String[] directcitys={"北京市","天津市","上海市","重庆市"};
	
	static String[] xinjiang_key={"地区","州","市"};
	//内蒙
	static String[] nm_key={"盟","市"};
	//海南
	static String[] hn_key={"市","县"};
	static String[] other_key={"市","州"};

	public static void startSplitRegion(String region,Area area)
	{
		String country1="";
		String  province1="";
		String  city1="";
		String  subcity1="";
		//String region=ia.getRegion();
		//匹配省得到国家,ip库没有中国
		int is_china=0;
		for (String province:provinces)
		{
			Pattern pat = Pattern.compile("^"+province);
			Matcher mat = pat.matcher(region);
			
	  if (mat.find())
	  {
		//  ia.setCountry("中国");
		//  ia.setProvince(province);
		  is_china=1;
		//  System.out.println("country:"+"中国");
		 // System.out.println("province:"+province);
		  country1="中国";
		  province1=province;
		  
		  int is_drcity=0;
		  String excludeProvince=region.substring(province.length());
		  String subcity="";
		  
			for (String directcity:directcitys)
			{
				 pat = Pattern.compile("^"+directcity);
				 mat = pat.matcher(region);
				 if (mat.find())
				 {
					 is_drcity=1;
						//ia.setCity(directcity);
				//	  System.out.println("directcity:"+directcity);
				//	  System.out.println("区:"+  excludeProvince);
					  city1=directcity;
					  subcity1=excludeProvince;
						break;
				 }
				
			
			}
			if ( excludeProvince.length()>0 && is_drcity==0)
			{
				if (province.equals("新疆"))
				{
					for (String xk:xinjiang_key)
					{
						int inx=excludeProvince.indexOf(xk);
						if (inx>0)
						{
						//	ia.setCity(excludeProvince.substring(0,inx+1));
							//ia.setSubcity(excludeProvince.substring(inx+1));
					//		System.out.println("city:"+excludeProvince.substring(0,inx+1));
						//	System.out.println("Subcity:"+excludeProvince.substring(inx+1));
							city1=excludeProvince.substring(0,inx+1);
							subcity1=excludeProvince.substring(inx+1);
							
							break;
						}
					}
				}
				else if (province.equals("内蒙古"))
				{
					for (String nk:nm_key)
					{
						int inx=excludeProvince.indexOf(nk);
						if (inx>0)
						{
						//	System.out.println("city:"+excludeProvince.substring(0,inx+1));
						//	System.out.println("Subcity:"+excludeProvince.substring(inx+1));
							city1=excludeProvince.substring(0,inx+1);
							subcity1=excludeProvince.substring(inx+1);
							
							break;
						}
					}
				}
				else if (province.equals("海南省"))
				{
					for (String hn:hn_key)
					{
						int inx=excludeProvince.indexOf(hn);
						if (inx>0)
						{
							//System.out.println("city:"+excludeProvince.substring(0,inx+1));
						//	System.out.println("Subcity:"+excludeProvince.substring(inx+1));
							city1=excludeProvince.substring(0,inx+1);
							subcity1=excludeProvince.substring(inx+1);
							
							break;
						}
					}
				}
				else
				{
					for (String ok:other_key)
					{
						int inx=excludeProvince.indexOf(ok);
						if (inx>0)
						{
						//	System.out.println("city:"+excludeProvince.substring(0,inx+1));
						//	System.out.println("Subcity:"+excludeProvince.substring(inx+1));
							city1=excludeProvince.substring(0,inx+1);
							subcity1=excludeProvince.substring(inx+1);
							
							break;
						}
					}
				}
				
			}
			
			break;
	  }
	
			
		}
		
		if (is_china==0)
		 {
			//吉林市长春市 类似这种有12条记录,统一置为中国
			if (region.contains("大学") || region.contains("网吧") || region.contains("学院") || region.contains("市"))
				//System.out.println("Country:"+"中国");
				country1="中国";
			else 
				//System.out.println("Country:"+region);
				country1=region;
		 }
	//	return country1+"\t"+province1+"\t"+city1+"\t"+subcity1+"\t";
		if (city1.contains("大学") || city1.contains("网吧"))
			city1="";
		if (subcity1.contains("大学") || subcity1.contains("网吧") || subcity1.contains("宿舍"))
			subcity1="";
		
		area.setCountry1(country1);
		area.setProvince1(province1);
		area.setCity1(city1);
		area.setSubcity1(subcity1);
		
	}
	
	public static void initxuexiao(HashMap<String,Area> areamaps)
	{
		BufferedReader br=null;
		try {
			 br=new BufferedReader(new FileReader("D:\\xuexiao.csv"));
			 String line="";
			 while((line=br.readLine())!=null)
			 {
				 String[] datas=line.split(",");
				 Area area=new Area();
				 area.setCountry1(datas[1]);
				 area.setProvince1(datas[2]);
				 area.setCity1(datas[3]);
				 area.setSubcity1(datas[4]);
				 areamaps.put(datas[0], area);
				 
			 }
			
		} catch (FileNotFoundException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		} catch (IOException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		}
		finally
		{
			try {
				br.close();
			} catch (IOException e) {
				// TODO Auto-generated catch block
				e.printStackTrace();
			}
		}
		
	}
	
	public static void main(String[] args) throws IOException
	{
	//startSplitRegion1("甘肃省兰州市");
		BufferedReader br=new BufferedReader(new FileReader("D:\\IPData.txt"));
		BufferedWriter bw=new BufferedWriter(new FileWriter("D:\\ipku.txt"));
		String line="";
		int i=0;
		HashMap<String,Area> areamaps=new HashMap<String,Area>(500);
		initxuexiao(areamaps);
		while ((line=br.readLine())!=null)
		{
			String[] datas=line.split("\t");
			Area area=new Area();
			if (areamaps.containsKey(datas[2]))
			{
				area=areamaps.get(datas[2]);
				
			}
			else
			startSplitRegion(datas[2],area);
			
		//	System.out.println("country:"+area.getCountry1()+" province:"+area.getProvince1()+"city:"+area.getCity1()+
	//				"subcity:"+area.getSubcity1());
			bw.write(datas[0]+"\t"+datas[1]+"\t"+area.getCountry1()+"\t"+area.getProvince1()
			+"\t"+area.getCity1()+"\t"+area.getSubcity1()+"\t"+datas[2]
					);
			bw.newLine();
		//	i++;
	//		if (i>100)
		//		break;
		}
		br.close();
		bw.close();
	}

}
package com.java.ipku;

public class Area {
	public String getCountry1() {
		return country1;
	}
	public void setCountry1(String country1) {
		this.country1 = country1;
	}
	public String getProvince1() {
		return province1;
	}
	public void setProvince1(String province1) {
		this.province1 = province1;
	}
	public String getCity1() {
		return city1;
	}
	public void setCity1(String city1) {
		this.city1 = city1;
	}
	public String getSubcity1() {
		return subcity1;
	}
	public void setSubcity1(String subcity1) {
		this.subcity1 = subcity1;
	}
	private String country1="";
	private String province1="";
	private String city1="";
	private String subcity1="";

}



相关文章推荐

IP归属地分段处理库

  • 2012年04月26日 10:18
  • 53.98MB
  • 下载

处理后的ip库

  • 2015年11月19日 16:32
  • 6.73MB
  • 下载

Linux VIP(虚拟IP)配置后,无法ping通的问题处理

事起于公司装了MQ服务器,想做成手工切换的双机模式,具体要求是虚拟IP,共享存储和MQ应用服务通过手工执行脚本方式启停,在虚拟IP、存储、MQ应用都正常的起来后,出现了同网段的IP可以ping 得通虚...

修改IP批处理

  • 2015年09月23日 13:31
  • 1KB
  • 下载

ip动态切换批处理

  • 2013年09月04日 08:45
  • 660B
  • 下载

Linux协议栈IP层的路由处理

http://blog.csdn.net/wangxing1018/article/details/4285489 写在前面: 其实,这篇文章也不知道取什么名字好,感觉什么都没讲。唉。。。其...

实验室IP批处理

  • 2014年06月09日 22:14
  • 2KB
  • 下载

Hadoop集群或者Spark集群IP地址发生改变的处理方法

之所以遇到这个问题是因为之前是在公司的工作机器上创建的三个Ubuntu虚拟机上搭建的Hadoop和spark伪分布集群,由于某种原因离职,还有部分任务没有完成,又不想就此废弃该环境,所以讲虚拟机文件拷...

更换IP 批处理

  • 2014年07月25日 15:38
  • 926B
  • 下载
内容举报
返回顶部
收藏助手
不良信息举报
您举报文章:ip库处理
举报原因:
原因补充:

(最多只允许输入30个字)