省市区初始化

1、准备maven包

<dependency>
        <groupId>org.jsoup</groupId>
        <artifactId>jsoup</artifactId>
        <version>1.15.4</version>
    </dependency>

<dependency>
    <groupId>org.apache.httpcomponents</groupId>
    <artifactId>httpclient</artifactId>
    <version>4.5.14</version>
</dependency>

2、对象类

public class AreaDto {

	private Integer id;
	
	private String name;
	
	private String code;
	
	private Integer parentId;
	
	private Integer sort;
	
	private Integer status;
	
	private String ruleId;

    private List<AreaDto> children=new ArrayList<AreaDto>();

}

3、编写工具类

public class RuleIdUtils {
	
	public static String getRootRuleId(int num) {
		if(num<2)
			throw new RuntimeException("必须大于等于2!");
		StringBuffer sb=new StringBuffer("");
		for(int i=0;i<num-1;i++) {
			sb.append("0");
		}
		sb.append("1");
		return sb.toString();
	}
	
	public static String getFirstChildRuleId(String parentRuleId,Integer num) {
		String firstId=getRootRuleId(num);
		return parentRuleId+firstId;
	}
	
	public static String getNextRuleId(String RuleId) {
		int length=RuleId.length();
		long next=Long.parseLong(RuleId)+1L;
		StringBuffer sb=new StringBuffer(String.valueOf(next));
		if(length<sb.length()) {
			throw new RuntimeException("超出最大范围!");
		}else if(length>sb.length()) {
			StringBuffer s1=new StringBuffer("");
			for(int i=0;i<(length-sb.length());i++) {
				s1.append("0");
			}
			sb=s1.append(sb);
		}
		return sb.toString();
	}
	
	public static void main(String[] args) {
		System.out.println(getRootRuleId(3));
		System.out.println(getFirstChildRuleId("001002",3));
		System.out.println(getNextRuleId("001002001"));
	}
	
}
public class AreaUtils {
	// 统计局行政区域链接
	public static final String REGION_URL="http://www.stats.gov.cn/sj/tjbz/tjyqhdmhcxhfdm/2022";
	
	public static Document parseUrl(String url) {
		try {
			//Document doc=Jsoup.connect(url).get();
			CloseableHttpClient httpClient = HttpClients.createDefault();
			HttpGet httpGet = new HttpGet(url);
	        CloseableHttpResponse response = httpClient.execute(httpGet);
	        String result=null;
	        if (response != null){
	            HttpEntity entity =  response.getEntity();  //获取网页内容
	            result = EntityUtils.toString(entity, "UTF-8");
	        }
	        if (response != null){
	            response.close();
	        }
	        if (httpClient != null){
	            httpClient.close();
	        }
	        if(StringUtils.isNotEmpty(result)) {
	        	return Jsoup.parse(result);
	        }
	        return null;
		} catch (IOException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
			return null;
		}
	}
	
	public static List<AreaDto> getArea() {
		List<AreaDto> data=new ArrayList<AreaDto>();
		// provincetr
		String className="provincetr";
		String parentHtml="/index.html";
		String ruleId="0000";
		int sort=1;
		Document doc=parseUrl(REGION_URL+parentHtml);
		Elements elements=doc.getElementsByClass(className);
		Elements links=elements.select("a[href]");
		Iterator<Element> it=links.iterator();			
		while (it.hasNext()) {
			Element element=it.next();
			AreaDto area=new AreaDto();
			area.setCode(element.attr("href").split("\\.")[0]);
			area.setName(element.html().replace("<br>", ""));
			ruleId=RuleIdUtils.getNextRuleId(ruleId);
			area.setRuleId(ruleId);
			area.setSort(sort++);
			
			getCityChildren(REGION_URL+"/"+element.attr("href"), area, "citytr");
			
			data.add(area);
		}
		return data;
	}
	
	private static void getCityChildren(String link,AreaDto parent,String className) {
		int sort=1;
		String ruleId=parent.getRuleId()+"0000";
		Document doc=parseUrl(link);
		if(null==doc)
			return;
		Elements elements=doc.getElementsByClass(className);
		Elements trs=elements.select("tr");
		Iterator<Element> it=trs.iterator();
		List<AreaDto> data=new ArrayList<AreaDto>();
		while (it.hasNext()) {
			Element element=it.next();				
			Elements tds=element.select("td");
			if(tds.size()==2) {
				AreaDto area=new AreaDto();
				Element td1=tds.get(0).select("a").first();
				area.setCode(td1.html());
				Element td2=tds.get(1).select("a").first();
				area.setName(td2.html());
				ruleId=RuleIdUtils.getNextRuleId(ruleId);
				area.setRuleId(ruleId);
				area.setSort(sort++);
				
				getCountyChildren(REGION_URL+"/"+td2.attr("href"), area, "countytr");
				
				data.add(area);
			}
		}
		parent.setChildren(data);
	}
	
	private static void getCountyChildren(String link,AreaDto parent,String className) {
		int sort=1;
		String ruleId=parent.getRuleId()+"0000";
		Document doc=parseUrl(link);
		if(null==doc)
			return;
		Elements elements=doc.getElementsByClass(className);
		Elements trs=elements.select("tr");
		Iterator<Element> it=trs.iterator();
		List<AreaDto> data=new ArrayList<AreaDto>();
		while (it.hasNext()) {
			Element element=it.next();				
			Elements tds=element.select("td");
			if(tds.size()==2) {
				AreaDto area=new AreaDto();
				Element td1=tds.get(0).select("a").first();
				// 因页面有些没有超链接
				if(null==td1) {
					td1=tds.get(0);
				}
				area.setCode(td1.html());
				Element td2=tds.get(1).select("a").first();
				// 因页面有些没有超链接
				if(null==td2) {
					td2=tds.get(1);
				}
				area.setName(td2.html());
				ruleId=RuleIdUtils.getNextRuleId(ruleId);
				area.setRuleId(ruleId);
				area.setSort(sort++);					
				data.add(area);
			}
		}
		parent.setChildren(data);
	}
		
	public static void main(String[] args) {
		List<AreaDto> data=getArea();
		System.out.println(JSONObject.toJSONString(data));
	}
		
}

3、说明

因省市区太多数据了,有时开发工具不一定能打印出来,建议直接边解析边插入数据库。嫌麻烦就直接用sql创建https://download.csdn.net/download/keng2206/87650437

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 1
    评论
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值