Java 爬取中国的地区代码

数据库连接工具

package com.example.pachong.controller;
/**
 * 数据库连接的实现
 */
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.SQLException;
public class DbUtil {
	private String url = "jdbc:mysql://localhost:3306/tean_dianti?useUnicode=true&characterEncoding=utf8";
	private String dbUser = "root" ;
	private String dbPassword = "root" ;
	private String dbDriver = "com.mysql.cj.jdbc.Driver" ;
	private Connection connection = null ;
	
	public  Connection getConnection() {
		try {
			Class.forName(dbDriver) ;
			connection = DriverManager.getConnection(url,dbUser,dbPassword) ;
			System.out.println("数据库连接成功!");
		} catch (ClassNotFoundException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		} catch (SQLException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		}
		return connection ;
	}
	public void closeConnection() {
		if(connection !=null) {
			try {
				connection.close();
				System.out.println("数据库连接关闭");
			} catch (SQLException e) {
				// TODO Auto-generated catch block
				e.printStackTrace();
			}
		}
	}

	
}

实体

package com.example.pachong.controller;

import lombok.AllArgsConstructor;
import lombok.Data;
import lombok.NoArgsConstructor;

import java.util.List;

/**
 * @version 1.0
 * @Author gxh
 * @Date 2022/8/8 13:04
 */
@Data
@NoArgsConstructor
@AllArgsConstructor
public class Area {

    private Long id;

    private Long parentId;

    private String name;

    private String antors;

    private List<Area> childen;



}

测试


package com.example.pachong.controller;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.sql.*;
import java.util.ArrayList;
import java.util.List;

import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
public class Test {
    private static String url = "http://blog.csdn.net";
    private static String blogName = "guoxiaolongonly";
    private static DbUtil dbUtil = new DbUtil();

    public static DbUtil getDbUtil() {
        return dbUtil;
    }
    private static Connection get(){
        return dbUtil.getConnection();
    }
    static Connection connection = get();
    public static void main(String[] args) throws IOException {


        List<Area> msg = getMsg();
        for (Area area : msg) {
            add(area,connection);
            for (Area area1 : area.getChilden()) {
                add(area1,connection);
                for (Area area2 : area1.getChilden()) {
                    add(area2,connection);
                }
            }

        }

    }

    public static boolean add(Area user, Connection connection) {

        String sql= "insert into sys_area_code(area_id,parent_id,ancetors,area_name,create_time) values(?,?,?,?,?)" ;
        try {

            PreparedStatement preparedStatement = connection.prepareStatement(sql);
            preparedStatement.setLong(1, user.getId());
            preparedStatement.setLong(2, user.getParentId());
            preparedStatement.setString(3, user.getAntors());
            preparedStatement.setString(4,user.getName());
            preparedStatement.setTimestamp(5, new Timestamp((new java.util.Date()).getTime()));
            return preparedStatement.executeUpdate()>0;
        } catch (SQLException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }
        return false ;
    }


    public static List<Area> getMsg() throws IOException {
        Document document = Jsoup.connect("http://www.ip33.com/area_code.html").get();
        Elements ip = document.getElementsByClass("ip");
        List<Area> areas = new ArrayList<>();
        for (Element element : ip) {
            //一级
            Area area = new Area();
            Elements h4 = element.select("h4");
            String[] s = h4.html().split(" ");
            area.setName(s[0]);
            area.setId(Long.valueOf(s[1]));
            area.setAntors("0");
            area.setParentId(0l);
            Elements ul = element.select("ul>li>h5");
            List<Area> list = new ArrayList<>();
            area.setChilden(list);
            for (Element element1 : ul) {
                String[] s1 = element1.html().split(" ");
                Area area1 = new Area();
                area1.setId(Long.valueOf(s1[1]));
                area1.setName(s1[0]);
                area1.setParentId(area.getId());
                area1.setAntors(area.getAntors()+","+area1.getParentId());
                list.add(area1);
                Elements select = element.select("ul>li>ul>li");
                List<Area> list1 = new ArrayList<>();
                area.setChilden(list);
                area1.setChilden(list1);
                for (Element element2 : select) {
                    Area area2 = new Area();
                    String[] s2 = element2.html().split(" ");
                    //判断是否是这个下面的
                    String substring = s2[1].substring(0, 4);
                    if (substring.equals(area1.getId()+"")){
                        area2.setChilden(null);
                        area2.setParentId(area1.getId());
                        area2.setAntors(area1.getAntors()+","+area1.getId());
                        area2.setId(Long.valueOf(s2[1]));
                        area2.setName(s2[0]);
                        list1.add(area2);
                    }
                }
            }
            areas.add(area);
        }
       return areas;
    }
}

数据库

SET NAMES utf8mb4;
SET FOREIGN_KEY_CHECKS = 0;

-- ----------------------------
-- Table structure for sys_area_code
-- ----------------------------
DROP TABLE IF EXISTS `sys_area_code`;
CREATE TABLE `sys_area_code`  (
  `area_id` bigint(20) NOT NULL AUTO_INCREMENT COMMENT '地区id',
  `parent_id` bigint(20) NULL DEFAULT NULL COMMENT '父id',
  `ancetors` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL DEFAULT NULL COMMENT '组级列表',
  `area_name` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL DEFAULT NULL COMMENT '地区名称',
  `state` char(1) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL DEFAULT '0' COMMENT '0 正常 1禁用',
  `del_flag` char(1) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL DEFAULT '0' COMMENT '0存在 2删除',
  `create_by` bigint(20) NULL DEFAULT 1 COMMENT '创建人',
  `create_time` datetime(0) NULL DEFAULT NULL ON UPDATE CURRENT_TIMESTAMP(0) COMMENT '创建时间',
  PRIMARY KEY (`area_id`) USING BTREE
) ENGINE = InnoDB AUTO_INCREMENT = 659005 CHARACTER SET = utf8mb4 COLLATE = utf8mb4_general_ci ROW_FORMAT = Dynamic;

SET FOREIGN_KEY_CHECKS = 1;

评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值