1、Excel数据存储HBase
package com.educoder.policeCalculate;
import com.educoder.policeCalculate.util.Excel2003Reader;
import com.educoder.policeCalculate.util.IRowReader;
import java.io.IOException;
import java.sql.*;
import java.util.ArrayList;
import java.util.List;
public class Cs {
/********** Begin **********/
//创建list集合,用来存放T_BKYJ_N_JBXX中的所有列
static List<String> columns = new ArrayList<String>();
public static void main(String[] args) throws IOException {
//查询列名
selectTable();
final List<String> col = new ArrayList<String>();
final List<List> list2 = new ArrayList<List>();
final List<Integer> li = new ArrayList<Integer>();
final List<String> lis = new ArrayList<String>();
//用Excel2003Reader工具类,解析Excel
Excel2003Reader reader = new Excel2003Reader(new IRowReader() {
//getRows 得到其工作列表,第几行以及改行的数据
public void getRows(int sheetIndex, int curRow, List<String> rowlist) {
List<String> list1 = new ArrayList<String>();
//第一行为Excel的列名,将其转化为大写后放入list1集合中
if (curRow == 0) {
for (int i = 0; i < rowlist.size(); i++) {
col.add(rowlist.get(i).toUpperCase());
}
//获取Excel和HBase表列名的交集
col.retainAll(columns);
for (int m = 0; m < col.size(); m++) {
int i = rowlist.indexOf(col.get(m).toLowerCase());
li.add(i);
lis.add("?");
}
} else {
//将该行Excel与HBase共有的列所对应的数据放入list1集合
for (int n = 0; n < li.size(); n++) {
list1.add(rowlist.get(li.get(n)));
}
//将每条数据放入list2中
list2.add(list1);
}
}
});
//读取Excel数据
reader.process("/root/files/t_bkyj_n_jbxx.xls");
String a = col.toString().substring(1, col.toString().length() - 1);
String b = lis.toString().substring(1, lis.toString().length() - 1);
//更新数据
UpsertTable(a, b, list2);
}
private static Connection connection = null;
private static Statement stmt = null;
private static ResultSet rs = null;
static {
try {
Class.forName("org.apache.phoenix.queryserver.client.Driver");
connection = DriverManager.getConnection("jdbc:phoenix:thin:url=http://127.0.0.1:8765;serialization=PROTOBUF");
} catch (ClassNotFoundException e) {
e.printStackTrace();
} catch (SQLException e) {
e.printStackTrace();
}
}
//查询T_BKYJ_N_JBXX中所有的列,并将其放入list集合中
public static void selectTable() {
try {
stmt = connection.createStatement();
rs = stmt.executeQuery(
"select * from T_BKYJ_N_JBXX");
ResultSetMetaData metaData = rs.getMetaData();
for (int j = 0; j < metaData.getColumnCount(); j++) {
String col_name = metaData.getColumnName(j + 1);
columns.add(col_name);
}
} catch (Exception e) {
e.printStackTrace();
} finally {
if (rs != null) {
try {
rs.close();
} catch (SQLException e) {
e.printStackTrace();
}
}
if (stmt != null) {
try {
stmt.close();
} catch (SQLException e) {
e.printStackTrace();
}
}
if (connection != null) {
try {
connection.close();
} catch (SQLException e) {
e.printStackTrace();
}
}
}
}
//更新数据,其中参数a为sql语句中的列名,b为对应个数的问号,list2为Excel中对应列的数据
public static void UpsertTable(String a, String b, List<List> list2) {
PreparedStatement ps = null;
try {
connection.setAutoCommit(false);
String sql = "UPSERT INTO T_BKYJ_N_JBXX(" + a + ") VALUES(" + b + ")";
ps = connection.prepareStatement(sql);
for (int i = 0; i < list2.size(); i++) {
for (int j = 0; j < list2.get(i).size(); j++) {
ps.setString(j + 1, list2.get(i).get(j).toString());
}
ps.addBatch();
if (i % 10000 == 0) {
ps.executeBatch();
ps.clearBatch();
}
}
ps.executeBatch();
ps.clearBatch();
connection.commit();
} catch (Exception e) {
e.printStackTrace();
} finally {
if (ps != null) {
try {
ps.close();
} catch (SQLException e) {
e.printStackTrace();
}
}
if (connection != null) {
try {
connection.close();
} catch (SQLException e) {
e.printStackTrace();
}
}
}
}
/********** End **********/
}
2、文本文件数据存储HBase
package com.educoder.policeCalculate;
import com.univocity.parsers.csv.Csv;
import org.apache.parquet.io.ValidatingRecordConsumer;
import org.apache.spark.SparkConf;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.SaveMode;
import org.apache.spark.sql.SparkSession;
import org.apache.log4j.Level;
import org.apache.log4j.Logger;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
public class CsvToHbase {
public static void Csv(SparkSession spark){
Logger.getLogger("org").setLevel(Level.ERROR);
/********** Begin **********/
//从文件中获取数据
Dataset<Row> df = spark.read().option("header", "true").csv("/root/files1/a.csv");
//创建fileList集合
List<String> fileList = new ArrayList<String>();
//获取文件中的所有列,将其转化为大写
String[] fileColumns = df.columns();
for (int i = 0; i<fileColumns.length;i++){
fileList.add(fileColumns[i].toUpperCase());
}
//获取表T_BKYJ_N_JBXX的所有列
String[] tableColumns = spark.read().option("table", "T_BKYJ_N_JBXX").option("zkUrl", "127.0.0.1:2181")
.format("org.apache.phoenix.spark").load().columns();
//将数组转化为list
List<String> tableLists = Arrays.asList(tableColumns);
List tableList = new ArrayList(tableLists);
//获取两个集合的交集
fileList.retainAll(tableList);
String columns = fileList.toString().substring(1,fileList.toString().length()-1);
//将从将从文件中加载的数据注册为临时表
df.createOrReplaceTempView("t");
//查询文件中相应字段的名称将其插入HBase中
spark.sql("select " + columns + " from t").write().mode(SaveMode.Overwrite)
.format("org.apache.phoenix.spark").option("zkUrl", "127.0.0.1:2181")
.option("table", "T_BKYJ_N_JBXX").save();
/********** End **********/
}
}