java爬取行政区划代码

1、导入依赖

        <dependency>
            <groupId>org.jsoup</groupId>
            <artifactId>jsoup</artifactId>
            <version>1.14.3</version>
        </dependency>

        <dependency>
            <groupId>org.apache.httpcomponents</groupId>
            <artifactId>httpclient</artifactId>
            <version>4.5.5</version>
        </dependency>

2、代码

package com.lxq.excel;

import com.alibaba.fastjson.JSON;
import com.alibaba.fastjson.JSONObject;
import com.lxq.excel.util.HttpClientUtils;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.net.URI;
import java.net.URL;
import java.util.ArrayList;
import java.util.List;

/**
 * @Author lixiaoqiang
 * @Date 2023/2/22 16:18
 */
public class GetCityCode {

    private static String[]  classSrcs=new String[]{
            "provincetr","citytr","countytr","towntr","villagetr"
    };
    private static int i = 0;
    static String fileName;
    public static void main(String[] args) {
        try {
            testProvince();
        } catch (Exception e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }
    }
    static void testCatch(){
        try {
            //Document doc = Jsoup.parse(new URL(url).openStream(), "GBK", url);
            // System.out.println(doc.toString());
            int a = 10/0;
        }catch (Exception e){
            System.out.println("catch");
            return;
        }
        System.out.println("last");
    }
    // 省
    public static void testProvince() throws Exception {
        String url = "http://www.stats.gov.cn/tjsj/tjbz/tjyqhdmhcxhfdm/2022/index.html";
        Document doc=Jsoup.connect(url).get();
        String s = HttpClientUtils.doGet("http://www.stats.gov.cn/tjsj/tjbz/tjyqhdmhcxhfdm/2022/index.html");
        System.out.println(s);
        //Document doc=Jsoup.parse(s);
        //Document doc = Jsoup.parse(new URL(url).openStream(), "gb2312", url);
        //System.out.println(doc.toString());
        Elements containers = doc.getElementsByClass("provincetr");
        Document containerDoc = Jsoup.parse(containers.toString());
        int size = containerDoc.select("a").size();
        for (int i = 0; i < size; i++) {
            String pH = containerDoc.select("a").get(i).attr("href");
            String pName = containerDoc.select("a").get(i).text();
            System.out.println(containerDoc.select("a").get(i).attr("href"));
            System.out.println(containerDoc.select("a").get(i).text());
            fileName = pName+".csv";
            if (i > 12) {
                int t = url.lastIndexOf("/");
                String cityUrl = url.substring(0, t + 1);
                testCity(cityUrl + pH);
                //resCrabData(cityUrl+pH,classSrcs[1]);
            }
            int t = url.lastIndexOf("/");
            String cityUrl = url.substring(0, t + 1);
            //testCity(cityUrl + pH,);
        }
        // System.out.println(containerDoc.toString());
    }

    public static String selectNextClassSrcByCurSrc(String curSrc){
        System.out.println("aaaaaaaaa=="+curSrc);
        for(int i =0;i<classSrcs.length;i++){
            String s = classSrcs[i];
            if(s.equals(curSrc) && i!= (classSrcs.length-1)){
                return classSrcs[i+1];
            }
        }
        return null;
    }

    // 市
    public static void testCity(String url) throws Exception {
        //Document doc = Jsoup.parse(new URL(url).openStream(), "GBK", url);
        // System.out.println(doc.toString());
        Document doc=Jsoup.connect(url).get();
        Elements containers = doc.getElementsByClass("citytr");
        int elementsSize = containers.size();
        // System.out.println(elementsSize);
        // System.out.println(containers.toString());
        for (int i = 0; i < elementsSize; i++) {
            Element e = containers.get(i);
            // System.out.println(e.toString()+"==");
            Document containerDoc = Jsoup.parse(e.toString());
            int size = containerDoc.select("a").size();
            // System.out.println(size);
            String cH = containerDoc.select("a").get(0).attr("href");
            String cCode = containerDoc.select("a").get(0).text();
            String cName = containerDoc.select("a").get(1).text();

            String countryUrl = url.substring(0, url.lastIndexOf("/") + 1);
            JSONObject json = new JSONObject();
            json.put("cityCode",cCode);
            json.put("cityName",cName);
            testCountry(countryUrl + cH,json.toJSONString());
            /*
             * for(int j = 0;j<size;j++){ String
             * cH=containerDoc.select("a").get(j).attr("href"); String
             * cName=containerDoc.select("a").get(j).text();
             * System.out.println(cH+"===="); System.out.println(cName+"==="); }
             */
        }

        // for(int i =0 ;i<size;i++){
        // String cH=containerDoc.select("a").get(i).attr("href");
        // String cName=containerDoc.select("a").get(i).text();
        // System.out.println(cH);
        // System.out.println(cName);
        // }

    }

    // 县
    public static void testCountry(String url,String j) throws Exception {
        //Document doc = Jsoup.parse(new URL(url).openStream(), "GBK", url);
        // System.out.println(doc.toString());
        Document doc=Jsoup.connect(url).get();
        Elements containers = doc.getElementsByClass("countytr");
        int elementsSize = containers.size();
        //System.out.println(elementsSize);
        // System.out.println(containers.toString());
        for (int i = 0; i < elementsSize; i++) {
            Element e = containers.get(i);
            // System.out.println(e.toString()+"==");
            Document containerDoc = Jsoup.parse(e.toString());
            int size = containerDoc.select("a").size();
            // System.out.println(size);
            if (size == 0) {
                // int ss=e.select("td").size();
                // System.out.println(ss);
                System.out.println("code===" + e.select("td").get(0).text());
                System.out.println("name===" + e.select("td").get(1).text());
                JSONObject json = JSON.parseObject(j);
                json.put("countryCode",e.select("td").get(0).text());
                json.put("countryName",e.select("td").get(1).text());
                write(json);
                // int ss=containerDoc.select("td").size();
                // System.out.println(ss);
            } else {
                String cH = containerDoc.select("a").get(0).attr("href");
                String cCode = containerDoc.select("a").get(0).text();
                String cName = containerDoc.select("a").get(1).text();

                JSONObject json = JSON.parseObject(j);
                json.put("countryCode",cCode);
                json.put("countryName",cName);

                String countryUrl = url.substring(0, url.lastIndexOf("/") + 1);
                testTown(countryUrl + cH,json.toJSONString());
            }

        }

    }

    // 乡、镇
    public static void testTown(String url,String j) throws Exception {
        List<JSONObject> list = new ArrayList<>();
        Document doc;
        try {
        //Document doc = Jsoup.parse(new URL(url).openStream(), "GBK", url);
        // System.out.println(doc.toString());
            doc = Jsoup.connect(url).get();
        }catch (Exception e){
            testTown(url,j);
            return;
        }
        Elements containers = doc.getElementsByClass("towntr");
        int elementsSize = containers.size();
        //System.out.println(elementsSize);
        // System.out.println(containers.toString());
        for (int i = 0; i < elementsSize; i++) {
            Element e = containers.get(i);
            // System.out.println(e.toString()+"==");
            Document containerDoc = Jsoup.parse(e.toString());
            int size = containerDoc.select("a").size();
            // System.out.println(size);
            if (size == 0) {
                // int ss=e.select("td").size();
                // System.out.println(ss);
                System.out.println("code===" + e.select("td").get(0).text());
                System.out.println("name===" + e.select("td").get(1).text());
                JSONObject json = JSON.parseObject(j);
                json.put("townCode",e.select("td").get(0).text());
                json.put("townName",e.select("td").get(1).text());
                write(json);
                // int ss=containerDoc.select("td").size();
                // System.out.println(ss);
            } else {
                String cH = containerDoc.select("a").get(0).attr("href");
                String cCode = containerDoc.select("a").get(0).text();
                String cName = containerDoc.select("a").get(1).text();

                JSONObject json = JSON.parseObject(j);
                json.put("townCode",cCode);
                json.put("townName",cName);
                String villageTrUrl = url.substring(0, url.lastIndexOf("/") + 1);
                testVillageTr(villageTrUrl + cH,json.toJSONString());
            }

        }

    }
    // 乡、镇
    public static void testTown2(String url,String j) throws Exception {
        //Document doc = Jsoup.parse(new URL(url).openStream(), "GBK", url);
        // System.out.println(doc.toString());
        Document doc=Jsoup.connect(url).get();
        Elements containers = doc.getElementsByClass("towntr");
        int elementsSize = containers.size();
        //System.out.println(elementsSize);
        // System.out.println(containers.toString());
        for (int i = 0; i < elementsSize; i++) {
            Element e = containers.get(i);
            // System.out.println(e.toString()+"==");
            Document containerDoc = Jsoup.parse(e.toString());
            int size = containerDoc.select("a").size();
            // System.out.println(size);
            if (size == 0) {
                // int ss=e.select("td").size();
                // System.out.println(ss);
                System.out.println("code===" + e.select("td").get(0).text());
                System.out.println("name===" + e.select("td").get(1).text());
                JSONObject json = JSON.parseObject(j);
                json.put("townCode",e.select("td").get(0).text());
                json.put("townName",e.select("td").get(1).text());
                write(json);
                // int ss=containerDoc.select("td").size();
                // System.out.println(ss);
            } else {
                String cH = containerDoc.select("a").get(0).attr("href");
                String cCode = containerDoc.select("a").get(0).text();
                String cName = containerDoc.select("a").get(1).text();

                JSONObject json = JSON.parseObject(j);
                json.put("townCode",cCode);
                json.put("townName",cName);
                String villageTrUrl = url.substring(0, url.lastIndexOf("/") + 1);
                testVillageTr(villageTrUrl + cH,json.toJSONString());
            }

        }

    }
    public static void testVillageTr(String url,String j)  {
        //Document doc = Jsoup.parse(new URL(url).openStream(), "GBK", url);
        // System.out.println(doc.toString());
        List<JSONObject> list = new ArrayList<>();
        try {
            Document doc=Jsoup.connect(url).get();
            Elements containers = doc.getElementsByClass("villagetr");
            int elementsSize = containers.size();
            System.err.println(elementsSize);
            // System.out.println(containers.toString());
            for (int i = 0; i < elementsSize; i++) {
                Element e = containers.get(i);
                // System.out.println(e.toString()+"==");
                Document containerDoc = Jsoup.parse(e.toString());
                int size = containerDoc.select("a").size();
                // System.out.println(size);
                if (size == 0) {
                    int ss = e.select("td").size();
                    System.out.println(ss);
                    String cCode = e.select("td").get(0).text();
                    String cName = e.select("td").get(2).text();
                    JSONObject json = JSON.parseObject(j);
                    json.put("villageTrCode",cCode);
                    json.put("villageTrName",cName);
                    list.add(json);

                } else {
                    String cCode = containerDoc.select("a").get(0).text();
                    String cName = containerDoc.select("a").get(2).text();

                    JSONObject json = JSON.parseObject(j);
                    json.put("villageTrCode",cCode);
                    json.put("villageTrName",cName);
                    list.add(json);

                }

            }
        }catch (Exception e){
            list = new ArrayList<>();
            System.err.println("url"+url);
            testVillageTr(url,j);
        }
        list.forEach(a->{
            write(a);
        });
    }
    // 村

    public static void testVillageTr2(String url,String j) throws Exception {
        //Document doc = Jsoup.parse(new URL(url).openStream(), "GBK", url);
        // System.out.println(doc.toString());
        Document doc=Jsoup.connect(url).get();
        Elements containers = doc.getElementsByClass("villagetr");
        int elementsSize = containers.size();
        System.out.println(elementsSize);
        // System.out.println(containers.toString());
        for (int i = 0; i < elementsSize; i++) {
            Element e = containers.get(i);
            // System.out.println(e.toString()+"==");
            Document containerDoc = Jsoup.parse(e.toString());
            int size = containerDoc.select("a").size();
            // System.out.println(size);
            if (size == 0) {
                int ss = e.select("td").size();
                System.out.println(ss);
                String cCode = e.select("td").get(0).text();
                String cName = e.select("td").get(2).text();
                JSONObject json = JSON.parseObject(j);
                json.put("villageTrCode",cCode);
                json.put("villageTrName",cName);
                write(json);
            } else {
                String cCode = containerDoc.select("a").get(0).text();
                String cName = containerDoc.select("a").get(2).text();

                JSONObject json = JSON.parseObject(j);
                json.put("villageTrCode",cCode);
                json.put("villageTrName",cName);
                write(json);
            }

        }

    }
    private static void write(JSONObject json){
        System.out.println(json.toJSONString());
        try {
            boolean newFile = false;
            String filePath = "E:\\md\\tmp\\"+fileName;
            // 输出的文件流
            File file = new File(filePath);
            if (!file.exists()){
                file.createNewFile();
                newFile = true;
            }
            FileOutputStream os = new FileOutputStream(file, true);
            if(newFile){
                os.write("cityCode,cityName,countryCode,countryName,townCode,townName,villageTrCode,villageTrName".getBytes());
                String newLine = System.getProperty("line.separator");
                os.write(newLine.getBytes());
            }
            String msg = json.getString("cityCode")+","
                    +json.getString("cityName")+","
                    +json.getString("countryCode")+","
                    +json.getString("countryName")+","
                    +json.getString("townCode")+","
                    +json.getString("townName")+","
                    +json.getString("villageTrCode")+","
                    +json.getString("villageTrName");
            // 开始读取
            os.write(msg.getBytes());
            String newLine = System.getProperty("line.separator");
            os.write(newLine.getBytes());
            // 完毕,关闭所有链接
            os.close();
        }catch (IOException e){

        }

    }


}

  • 0
    点赞
  • 3
    收藏
    觉得还不错? 一键收藏
  • 1
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值