通过MySQL实现对HDFS文件变更的监控

通过MySQL实现对HDFS文件变更的监控

ReadFileToMysql


import java.io.*;
import java.sql.*;
import java.text.SimpleDateFormat;
import java.util.Calendar;
import java.util.Date;

import org.apache.log4j.Logger;

public class ReadFileToMysql {
    static final String JDBC_DRIVER = "com.mysql.cj.jdbc.Driver";
    static final String DB_URL = "jdbc:mysql://127.0.0.1:3306/metastore3?useSSL=false&allowPublicKeyRetrieval=true&serverTimezone=UTC";
    static final String USER = "metastore3";
    static final String PASS = "metastore3";
    private static final Logger logger = Logger.getLogger(ReadFileToMysql.class);

    //使用static执行
    public static void excuteCmd() {
        Tools tools = new Tools();
        tools.executeCmd("hdfs dfs -ls -R / > /test/hdfs_files.txt");
        tools.executeCmd("echo FINISH >> /test/hdfs_files.txt");
    }

    public static void main(String[] args) {
        excuteCmd();

        Connection conn = null;
        Statement stmt = null;
        SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd");//yyyy-MM-dd HH:mm:ss
        Date date = new Date();
        // 库名、当日表名
        String db = "test";
        String createTabNam = db + "." + sdf.format(date).replaceAll("-", "_") + "_hdfs_files";
        // 前日表名
        Calendar calendar = Calendar.getInstance();
        calendar.setTime(date);
        calendar.add(Calendar.DAY_OF_MONTH, -1);
        date = calendar.getTime();
        String oldTabNam = db + "." + sdf.format(date).replaceAll("-", "_") + "_hdfs_files";

        // hdfs_files数据文件存放路径
        String rfp = args[0];

        try {
            Class.forName(JDBC_DRIVER);
            conn = DriverManager.getConnection(DB_URL, USER, PASS);
            stmt = conn.createStatement();

            BufferedInputStream fr = null;
            BufferedReader reader = null;
            // 创建当天文件表
            String createSql = "create table " + createTabNam + "(\n" +
                    "file_name VARCHAR(250),\n" +
                    "visit_time DATETIME\n" +
                    ")";
//            stmt.execute(createSql);
            //TODO 删除前天表数据???   例如今天是9号,删除7号的表

            try {
                fr = new BufferedInputStream(new FileInputStream(new File(rfp)));
                reader = new BufferedReader(new InputStreamReader(fr, "utf-8"), 5 * 1024 * 1024);
                boolean boo = true;
                String line = "";
                while (boo) {
                    line = reader.readLine();
                    if (line.equals("FINISH")) {
                        boo = false;
                    } else {
                        String[] strArr = line.split(" ");
                        int sas = strArr.length;
                        String visitDate = strArr[sas - 3] + " " + strArr[sas - 2];
                        // 判断目录
                        if (!strArr[0].matches("^d.+")) {
                            // 将当天最新文件导入mysql
                            String sql = "INSERT INTO " + createTabNam + "(file_name,visit_time) VALUES(\"" + strArr[sas - 1] + "\",\"" + visitDate + "\")";
                            stmt.execute(sql);
                        }
                    }
                }
            } catch (IOException e) {
                e.printStackTrace();
            } catch (SQLException e) {
                e.printStackTrace();
            } finally {
                if (reader != null) {
                    try {
                        reader.close();
                        fr.close();
                    } catch (IOException e) {
                        logger.error("关闭读取流异常:" + e);
                    }
                }
            }

            // TODO 查询出现变动的文件(增删改)
            String querySql = "SELECT * FROM\n" +
                    "(\n" +
                    "SELECT h_9.file_name AS file_name,IF(h_10.visit_time IS NULL,h_9.visit_time,h_10.visit_time) AS visit_time,\n" +
                    "(\n" +
                    "\t\tCASE \n" +
                    "\t\t\tWHEN h_10.file_name IS NULL THEN '删除'\n" +
                    "\t\t\tWHEN h_10.file_name IS NOT NULL AND h_10.visit_time != h_9.visit_time THEN '更新'\n" +
                    "\t\t\tWHEN h_10.file_name IS NOT NULL AND h_10.visit_time = h_9.visit_time THEN '无变动'\n" +
                    "\t\t\tELSE NULL\n" +
                    "\t\tEND\n" +
                    "\t) AS statu\n" +
                    "FROM " + createTabNam + " AS h_10\n" +
                    "RIGHT JOIN " + oldTabNam + " AS h_9\n" +
                    "ON h_10.file_name = h_9.file_name\n" +
                    ") new_t\n" +
                    "WHERE new_t.statu != \"无变动\"";
            // 表存在再去对比
            boolean tabExi = false;
            stmt.execute("use test");
            ResultSet tables = stmt.executeQuery("show tables");
            while (tables.next()) {
                if (tables.getString(1).equals(oldTabNam)) {
                    tabExi = true;
                }
            }
            // 前一天的表存在则查询
            if (tabExi) {
                ResultSet rs = stmt.executeQuery(querySql);
                while (rs.next()) {
                    // 通过字段检索
                    int id = rs.getInt("id");
                    String file_name = rs.getString("file_name");
                    String visit_time = sdf.format(rs.getDate("visit_time"));
                    String statu = sdf.format(rs.getDate("statu"));

                    // TODO 输出数据
                    System.out.print("file_name: " + file_name);
                    System.out.print("visit_time: " + visit_time);
                    System.out.print("statu: " + statu);
                    System.out.print("\n");
                }
            }

            stmt.close();
            conn.close();
        } catch (SQLException se) {
            se.printStackTrace();
        } catch (Exception e) {
            e.printStackTrace();
        } finally {
            try {
                if (stmt != null) stmt.close();
            } catch (SQLException se2) {
            }
            try {
                if (conn != null) conn.close();
            } catch (SQLException se) {
                se.printStackTrace();
            }
        }

    }

}

Tools

class Tools {

  // 运行Linux命令
  def executeCmd(cmd: String): Unit = {
    import scala.sys.process._
    val result = s"${cmd}" ! //执行命令将结果赋值给result
  }
}

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值