原始的纯真ip库有两个问题
1、地区没有拆分国家省市区县,需要程序二次拆分
2、有一些不规范数据,即学校网吧之类的,排重手工整理
程序里的ipdata xuexiao.csv可见http://download.csdn.net/detail/u011750989/9283149
package com.java.ipku;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.util.HashMap;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class IpUtil {
static String[] provinces={"北京市","天津市","上海市","重庆市","河北省","山西省","辽宁省","吉林省","黑龙江省","江苏省","浙江省","安徽省","福建省","江西省","山东省","河南省","湖北省","湖南省","广东省","海南省","四川省","贵州省","云南省","陕西省","甘肃省","青海省","台湾省","内蒙古","广西","西藏","宁夏","新疆","香港","澳门"};
static String[] directcitys={"北京市","天津市","上海市","重庆市"};
static String[] xinjiang_key={"地区","州","市"};
//内蒙
static String[] nm_key={"盟","市"};
//海南
static String[] hn_key={"市","县"};
static String[] other_key={"市","州"};
public static void startSplitRegion(String region,Area area)
{
String country1="";
String province1="";
String city1="";
String subcity1="";
//String region=ia.getRegion();
//匹配省得到国家,ip库没有中国
int is_china=0;
for (String province:provinces)
{
Pattern pat = Pattern.compile("^"+province);
Matcher mat = pat.matcher(region);
if (mat.find())
{
// ia.setCountry("中国");
// ia.setProvince(province);
is_china=1;
// System.out.println("country:"+"中国");
// System.out.println("province:"+province);
country1="中国";
province1=province;
int is_drcity=0;
String excludeProvince=region.substring(province.length());
String subcity="";
for (String directcity:directcitys)
{
pat = Pattern.compile("^"+directcity);
mat = pat.matcher(region);
if (mat.find())
{
is_drcity=1;
//ia.setCity(directcity);
// System.out.println("directcity:"+directcity);
// System.out.println("区:"+ excludeProvince);
city1=directcity;
subcity1=excludeProvince;
break;
}
}
if ( excludeProvince.length()>0 && is_drcity==0)
{
if (province.equals("新疆"))
{
for (String xk:xinjiang_key)
{
int inx=excludeProvince.indexOf(xk);
if (inx>0)
{
// ia.setCity(excludeProvince.substring(0,inx+1));
//ia.setSubcity(excludeProvince.substring(inx+1));
// System.out.println("city:"+excludeProvince.substring(0,inx+1));
// System.out.println("Subcity:"+excludeProvince.substring(inx+1));
city1=excludeProvince.substring(0,inx+1);
subcity1=excludeProvince.substring(inx+1);
break;
}
}
}
else if (province.equals("内蒙古"))
{
for (String nk:nm_key)
{
int inx=excludeProvince.indexOf(nk);
if (inx>0)
{
// System.out.println("city:"+excludeProvince.substring(0,inx+1));
// System.out.println("Subcity:"+excludeProvince.substring(inx+1));
city1=excludeProvince.substring(0,inx+1);
subcity1=excludeProvince.substring(inx+1);
break;
}
}
}
else if (province.equals("海南省"))
{
for (String hn:hn_key)
{
int inx=excludeProvince.indexOf(hn);
if (inx>0)
{
//System.out.println("city:"+excludeProvince.substring(0,inx+1));
// System.out.println("Subcity:"+excludeProvince.substring(inx+1));
city1=excludeProvince.substring(0,inx+1);
subcity1=excludeProvince.substring(inx+1);
break;
}
}
}
else
{
for (String ok:other_key)
{
int inx=excludeProvince.indexOf(ok);
if (inx>0)
{
// System.out.println("city:"+excludeProvince.substring(0,inx+1));
// System.out.println("Subcity:"+excludeProvince.substring(inx+1));
city1=excludeProvince.substring(0,inx+1);
subcity1=excludeProvince.substring(inx+1);
break;
}
}
}
}
break;
}
}
if (is_china==0)
{
//吉林市长春市 类似这种有12条记录,统一置为中国
if (region.contains("大学") || region.contains("网吧") || region.contains("学院") || region.contains("市"))
//System.out.println("Country:"+"中国");
country1="中国";
else
//System.out.println("Country:"+region);
country1=region;
}
// return country1+"\t"+province1+"\t"+city1+"\t"+subcity1+"\t";
if (city1.contains("大学") || city1.contains("网吧"))
city1="";
if (subcity1.contains("大学") || subcity1.contains("网吧") || subcity1.contains("宿舍"))
subcity1="";
area.setCountry1(country1);
area.setProvince1(province1);
area.setCity1(city1);
area.setSubcity1(subcity1);
}
public static void initxuexiao(HashMap<String,Area> areamaps)
{
BufferedReader br=null;
try {
br=new BufferedReader(new FileReader("D:\\xuexiao.csv"));
String line="";
while((line=br.readLine())!=null)
{
String[] datas=line.split(",");
Area area=new Area();
area.setCountry1(datas[1]);
area.setProvince1(datas[2]);
area.setCity1(datas[3]);
area.setSubcity1(datas[4]);
areamaps.put(datas[0], area);
}
} catch (FileNotFoundException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
finally
{
try {
br.close();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}
public static void main(String[] args) throws IOException
{
//startSplitRegion1("甘肃省兰州市");
BufferedReader br=new BufferedReader(new FileReader("D:\\IPData.txt"));
BufferedWriter bw=new BufferedWriter(new FileWriter("D:\\ipku.txt"));
String line="";
int i=0;
HashMap<String,Area> areamaps=new HashMap<String,Area>(500);
initxuexiao(areamaps);
while ((line=br.readLine())!=null)
{
String[] datas=line.split("\t");
Area area=new Area();
if (areamaps.containsKey(datas[2]))
{
area=areamaps.get(datas[2]);
}
else
startSplitRegion(datas[2],area);
// System.out.println("country:"+area.getCountry1()+" province:"+area.getProvince1()+"city:"+area.getCity1()+
// "subcity:"+area.getSubcity1());
bw.write(datas[0]+"\t"+datas[1]+"\t"+area.getCountry1()+"\t"+area.getProvince1()
+"\t"+area.getCity1()+"\t"+area.getSubcity1()+"\t"+datas[2]
);
bw.newLine();
// i++;
// if (i>100)
// break;
}
br.close();
bw.close();
}
}
package com.java.ipku;
public class Area {
public String getCountry1() {
return country1;
}
public void setCountry1(String country1) {
this.country1 = country1;
}
public String getProvince1() {
return province1;
}
public void setProvince1(String province1) {
this.province1 = province1;
}
public String getCity1() {
return city1;
}
public void setCity1(String city1) {
this.city1 = city1;
}
public String getSubcity1() {
return subcity1;
}
public void setSubcity1(String subcity1) {
this.subcity1 = subcity1;
}
private String country1="";
private String province1="";
private String city1="";
private String subcity1="";
}