java获取全国省市区镇,csv导出为excel文件,部分内容转载

public class Test02 {
    private static Map<Integer, String> cssMap = new HashMap<Integer, String>();

    private static String SHENG = "";
    private static String SHI = "";
    private static String QU = "";
    private static String ZHEN = "";
    /**
     * CSV文件列分隔符
     */
    private static final String CSV_COLUMN_SEPARATOR = ",";
    /**
     * CSV文件列分隔符
     */
    private static final String CSV_RN = "\r\n";

    static {
        cssMap.put(1, "provincetr");// 省
        cssMap.put(2, "citytr");// 市
        cssMap.put(3, "countytr");// 县/区
        cssMap.put(4, "towntr");// 镇
        cssMap.put(5, "villagetr");// 村
    }

    public static void main(String[] args) throws Exception {
        int level = 1;

        String sTitle = "省份,市,区,镇,类型";
        StringBuffer buf = new StringBuffer();
        String[] titleList = sTitle.split(",");
        for (String title : titleList) {
            buf.append(title).append(CSV_COLUMN_SEPARATOR);
        }
        buf.append(CSV_RN);

        // 获取全国各个省级信息
        Document connect = connect("http://www.stats.gov.cn/tjsj/tjbz/tjyqhdmhcxhfdm/2021/");
        Elements rowProvince = connect.select("tr." + cssMap.get(level));
        for (Element provinceElement : rowProvince) {// 遍历每一行的省份城市
            Elements select = provinceElement.select("a");
            int i=0;
            for (Element province : select) {// 每一个省份
                i++;
                printInfoProvice(province, level,buf);
                parseNextLevel(province, level + 1,buf);
                return;
            }
            return;
        }
        SimpleDateFormat sdf = new SimpleDateFormat("yyyyMMddHHmmss");
        FileOutputStream fileOutputStream = new FileOutputStream("H:\\"+ sdf.format(new Date()).toString() + ".csv");
        try {
            fileOutputStream.write(buf.toString().getBytes(StandardCharsets.UTF_8));
            fileOutputStream.close();
        } catch (Exception e) {
            if (fileOutputStream != null) {
                fileOutputStream.close();
            }
            throw new Exception("导出失败");
        } finally {
            if (fileOutputStream != null) {
                fileOutputStream.close();
            }
        }
    }


    private static void parseNextLevel(Element parentElement, int level,StringBuffer buf) throws IOException {
        try {
            Thread.sleep(500);//睡眠一下,否则可能出现各种错误状态码
        } catch (InterruptedException e) {
            e.printStackTrace();
        }

        Document doc = connect(parentElement.attr("abs:href"));
        if (doc != null) {
            Elements newsHeadlines = doc.select("tr." + cssMap.get(level));
            // 获取表格的一行数据
            for (Element element : newsHeadlines) {
                printInfo(parentElement, element, level + 1,buf);
                Elements select = element.select("a");// 在递归调用的时候,这里是判断是否是村一级的数据,村一级的数据没有a标签
                if (select.size() != 0) {
                    parseNextLevel(select.last(), level + 1,buf);
                }
            }
        }
    }

    /**
     * 写一行数据到数据文件中去
     *
     * @param element 爬取到的数据元素
     *                *  @param level 城市级别
     */
    private static void printInfo(Element parentElement, Element element, int level,StringBuffer buf) {
        try {
            String reg = "[^\u4e00-\u9fa5]";

            if (level == 3) {//市
                SHI = element.select("td").last().text();
                if ("市辖区".equals(SHI)){
                    SHI = parentElement.toString().replaceAll(reg, "");
                }
                buf.append(SHENG).append(CSV_COLUMN_SEPARATOR)
                        .append(SHI).append(CSV_COLUMN_SEPARATOR)
                        .append("").append(CSV_COLUMN_SEPARATOR)
                        .append("").append(CSV_COLUMN_SEPARATOR)
                        .append("2").append(CSV_COLUMN_SEPARATOR)
                        .append(CSV_RN);
            } else if (level == 4) {//区
                QU = element.select("td").last().text();
                if (SHI.equals("市辖区")){
                    return;
                }
                buf.append(SHENG).append(CSV_COLUMN_SEPARATOR)
                        .append(SHI).append(CSV_COLUMN_SEPARATOR)
                        .append(QU).append(CSV_COLUMN_SEPARATOR)
                        .append("").append(CSV_COLUMN_SEPARATOR)
                        .append("3").append(CSV_COLUMN_SEPARATOR)
                        .append(CSV_RN);
            } else if (level == 5) {//镇
                ZHEN = element.select("td").last().text();
                buf.append(SHENG).append(CSV_COLUMN_SEPARATOR)
                        .append(SHI).append(CSV_COLUMN_SEPARATOR)
                        .append(QU).append(CSV_COLUMN_SEPARATOR)
                        .append(ZHEN).append(CSV_COLUMN_SEPARATOR)
                        .append("4").append(CSV_COLUMN_SEPARATOR)
                        .append(CSV_RN);
                System.out.println(SHENG + "," + SHI + "," + QU + "," + ZHEN);
            }
        } catch (Exception e) {
            e.printStackTrace();
        }
    }

    private static void printInfoProvice(Element element, int level,StringBuffer buf) {
        try {
            String reg = "[^\u4e00-\u9fa5]";

            //省份
            SHENG = element.toString().replaceAll(reg, "");
            buf.append(SHENG).append(CSV_COLUMN_SEPARATOR)
                    .append("").append(CSV_COLUMN_SEPARATOR)
                    .append("").append(CSV_COLUMN_SEPARATOR)
                    .append("").append(CSV_COLUMN_SEPARATOR)
                    .append("1").append(CSV_COLUMN_SEPARATOR)
                    .append(CSV_RN);
        } catch (Exception e) {
            e.printStackTrace();
        }
    }

    private static Document connect(String url) {
        if (url == null || url.isEmpty()) {
            throw new IllegalArgumentException("The input url('" + url + "') is invalid!");
        }
        try {
            return Jsoup.connect(url).timeout(100 * 1000).get();
        } catch (IOException e) {
            e.printStackTrace();
            return null;
        }
    }
}
  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 3
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 3
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值