ReadFileToMysql
import java.io.*;
import java.sql.*;
import java.text.SimpleDateFormat;
import java.util.Calendar;
import java.util.Date;
import org.apache.log4j.Logger;
public class ReadFileToMysql {
static final String JDBC_DRIVER = "com.mysql.cj.jdbc.Driver";
static final String DB_URL = "jdbc:mysql://127.0.0.1:3306/metastore3?useSSL=false&allowPublicKeyRetrieval=true&serverTimezone=UTC";
static final String USER = "metastore3";
static final String PASS = "metastore3";
private static final Logger logger = Logger.getLogger(ReadFileToMysql.class);
public static void excuteCmd() {
Tools tools = new Tools();
tools.executeCmd("hdfs dfs -ls -R / > /test/hdfs_files.txt");
tools.executeCmd("echo FINISH >> /test/hdfs_files.txt");
}
public static void main(String[] args) {
excuteCmd();
Connection conn = null;
Statement stmt = null;
SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd");
Date date = new Date();
String db = "test";
String createTabNam = db + "." + sdf.format(date).replaceAll("-", "_") + "_hdfs_files";
Calendar calendar = Calendar.getInstance();
calendar.setTime(date);
calendar.add(Calendar.DAY_OF_MONTH, -1);
date = calendar.getTime();
String oldTabNam = db + "." + sdf.format(date).replaceAll("-", "_") + "_hdfs_files";
String rfp = args[0];
try {
Class.forName(JDBC_DRIVER);
conn = DriverManager.getConnection(DB_URL, USER, PASS);
stmt = conn.createStatement();
BufferedInputStream fr = null;
BufferedReader reader = null;
String createSql = "create table " + createTabNam + "(\n" +
"file_name VARCHAR(250),\n" +
"visit_time DATETIME\n" +
")";
try {
fr = new BufferedInputStream(new FileInputStream(new File(rfp)));
reader = new BufferedReader(new InputStreamReader(fr, "utf-8"), 5 * 1024 * 1024);
boolean boo = true;
String line = "";
while (boo) {
line = reader.readLine();
if (line.equals("FINISH")) {
boo = false;
} else {
String[] strArr = line.split(" ");
int sas = strArr.length;
String visitDate = strArr[sas - 3] + " " + strArr[sas - 2];
if (!strArr[0].matches("^d.+")) {
String sql = "INSERT INTO " + createTabNam + "(file_name,visit_time) VALUES(\"" + strArr[sas - 1] + "\",\"" + visitDate + "\")";
stmt.execute(sql);
}
}
}
} catch (IOException e) {
e.printStackTrace();
} catch (SQLException e) {
e.printStackTrace();
} finally {
if (reader != null) {
try {
reader.close();
fr.close();
} catch (IOException e) {
logger.error("关闭读取流异常:" + e);
}
}
}
String querySql = "SELECT * FROM\n" +
"(\n" +
"SELECT h_9.file_name AS file_name,IF(h_10.visit_time IS NULL,h_9.visit_time,h_10.visit_time) AS visit_time,\n" +
"(\n" +
"\t\tCASE \n" +
"\t\t\tWHEN h_10.file_name IS NULL THEN '删除'\n" +
"\t\t\tWHEN h_10.file_name IS NOT NULL AND h_10.visit_time != h_9.visit_time THEN '更新'\n" +
"\t\t\tWHEN h_10.file_name IS NOT NULL AND h_10.visit_time = h_9.visit_time THEN '无变动'\n" +
"\t\t\tELSE NULL\n" +
"\t\tEND\n" +
"\t) AS statu\n" +
"FROM " + createTabNam + " AS h_10\n" +
"RIGHT JOIN " + oldTabNam + " AS h_9\n" +
"ON h_10.file_name = h_9.file_name\n" +
") new_t\n" +
"WHERE new_t.statu != \"无变动\"";
boolean tabExi = false;
stmt.execute("use test");
ResultSet tables = stmt.executeQuery("show tables");
while (tables.next()) {
if (tables.getString(1).equals(oldTabNam)) {
tabExi = true;
}
}
if (tabExi) {
ResultSet rs = stmt.executeQuery(querySql);
while (rs.next()) {
int id = rs.getInt("id");
String file_name = rs.getString("file_name");
String visit_time = sdf.format(rs.getDate("visit_time"));
String statu = sdf.format(rs.getDate("statu"));
System.out.print("file_name: " + file_name);
System.out.print("visit_time: " + visit_time);
System.out.print("statu: " + statu);
System.out.print("\n");
}
}
stmt.close();
conn.close();
} catch (SQLException se) {
se.printStackTrace();
} catch (Exception e) {
e.printStackTrace();
} finally {
try {
if (stmt != null) stmt.close();
} catch (SQLException se2) {
}
try {
if (conn != null) conn.close();
} catch (SQLException se) {
se.printStackTrace();
}
}
}
}
Tools
class Tools {
def executeCmd(cmd: String): Unit = {
import scala.sys.process._
val result = s"${cmd}" !
}
}