HBaseUtilss类
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.*;
import org.apache.hadoop.hbase.util.Bytes;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.StringTokenizer;
/**
* @ClassName: BigTableHbaseTest
* @author: zhl
* @date: 2019/12/5 9:35
*/
class HBaseUtilss{
ThreadLocal<List<Put>> threadLocal = new ThreadLocal<List<Put>>();
HBaseAdmin admin = null;
Connection conn = null;
private static HBaseUtilss instance = null;
public static synchronized HBaseUtilss getInstance() {
if (null == instance) instance = new HBaseUtilss();
return instance;
}
private HBaseUtilss() {
Configuration configuration = new Configuration();
configuration.set("hbase.zookeeper.quorum", "zhl:2181");
configuration.set("hbase.rootdir", "hdfs://zhl:9000/hbase");
try {
conn = ConnectionFactory.createConnection(configuration);
admin = new HBaseAdmin(configuration);
} catch (IOException e) {
e.printStackTrace();
}
}
/**
* 根据表名获取到HTable实例
*/
public HTable getTable(String tableName) {
HTable table = null;
try {
// table = new HTable(configuration, tableName);
final TableName tname = TableName.valueOf(tableName);
table = (HTable) conn.getTable(tname);
} catch (IOException e) {
e.printStackTrace();
}
return table;
}
/**
* 添加单条记录到HBase表
*
* @param tableName HBase表名
* @param rowkey HBase表的rowkey
* @param cf HBase表的columnfamily
* @param column HBase表的列key
* @param value 写入HBase表的值value
*/
public void put(String tableName, String rowkey, String cf, String column, String value) {
HTable table = getTable(tableName);
Put put = new Put(Bytes.toBytes(rowkey));
put.add(Bytes.toBytes(cf), Bytes.toBytes(column), Bytes.toBytes(value));
try {
table.put(put);
} catch (IOException e) {
e.printStackTrace();
}
}
/**
* 批量添加记录到HBase表,同一线程要保证对相同表进行添加操作!
*
* @param tableName HBase表名
* @param rowkey HBase表的rowkey
* @param cf HBase表的columnfamily
* @param column HBase表的列key
* @param value 写入HBase表的值value
*/
public void bulkput(String tableName, String rowkey, String cf, String column, String value) {
try {
List<Put> list = threadLocal.get();
if (list == null) {
list = new ArrayList<Put>();
}
Put put = new Put(Bytes.toBytes(rowkey));
put.add(Bytes.toBytes(cf), Bytes.toBytes(column), Bytes.toBytes(value));
list.add(put);
if (list.size() >= 188) {
HTable table = getTable(tableName);
table.put(list);
list.clear();
} else {
threadLocal.set(list);
}
// table.flushCommits();
} catch (IOException e) {
e.printStackTrace();
}
}
public static void main(String[] args) {
//获取csv里的数据
final List<String> list = new CsvUtilData().readCsv("C:/scadadata/2019/10/TB001-10 00.csv");
long start = System.currentTimeMillis();
//获取表头数据
final String[] head = list.get(0).split(",");
new Thread(){
public void run(){
for(int i = 1;i <list.size()/3;i++)
{
String[] spliststr = list.get(i).split(",");
for(int j=0;j<spliststr.length;j++){
//soldTicket("stu5",spliststr[3]+j, "info", head[headlength++], str);
HBaseUtilss.getInstance().bulkput("stu5",spliststr[3]+j, "info", head[j], spliststr[j]);
}
}
System.out.println(System.currentTimeMillis() - start);
}
}.start();
new Thread(new Runnable() {
public void run() {
//获取表头数据
String[] head1 = list.get(0).split(",");
for (int k = list.size()/3; k < list.size()/3*2; k++)
{
String[] spliststr = list.get(k).split(",");
for(int j = 0;j < spliststr.length; j++)
{
//soldTicket("stu5",spliststr[3]+j, "info", head[headlength++], str);s
HBaseUtilss.getInstance().bulkput("stu5",spliststr[3]+j, "info", head[j], spliststr[j]);
}
}
System.out.println(System.currentTimeMillis() - start);
}
}).start();
new Thread(new Runnable() {
public void run() {
//获取表头数据
String[] head1 = list.get(0).split(",");
for (int k = list.size()/3*2; k < list.size(); k++)
{
String[] spliststr = list.get(k).split(",");
for(int j = 0;j < spliststr.length; j++)
{
//soldTicket("stu5",spliststr[3]+j, "info", head[headlength++], str);s
HBaseUtilss.getInstance().bulkput("stu5",spliststr[3]+j, "info", head[j], spliststr[j]);
}
}
System.out.println(System.currentTimeMillis() - start);
}
}).start();
}
}
读取CSV文件类
import java.io.BufferedReader;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.util.ArrayList;
public class CsvUtilData
{
public ArrayList<String> readCsv(String filepath) {
File csv = new File(filepath); // CSV文件路径
csv.setReadable(true);//设置可读
csv.setWritable(true);//设置可写
BufferedReader br = null;
try {
br = new BufferedReader(new FileReader(csv));
} catch (FileNotFoundException e) {
e.printStackTrace();
}
String line = "";
String everyLine = "";
ArrayList<String> allString = new ArrayList<String>();
try {
while ((line = br.readLine()) != null) // 读取到的内容给line变量
{
everyLine = line;
allString.add(everyLine);
}
} catch (IOException e) {
e.printStackTrace();
}
return allString;
}
}
读取数据类
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.CellUtil;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.*;
import org.apache.hadoop.hbase.util.Bytes;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
/**
* @ClassName: ExportHbase
* @author: zhl
* @date: 2019/12/6 14:36
*/
public class ExportHbase {
public static Configuration conf = null;
public static Connection connection = null;
public static Admin admin = null;
static {
conf = HBaseConfiguration.create();
conf.set("hbase.zookeeper.quorum","zhl:2181");
conf.set("hbase.master", "zhl:60010");
conf.set("hbase.zookeeper.property.clientPort", "2181");
conf = HBaseConfiguration.create(conf);
try {
connection = ConnectionFactory.createConnection(conf);
admin = connection.getAdmin();
} catch (IOException e) {
e.printStackTrace();
}
}
public static List<String> qurryTableTestBatch (String tableName,List < String > rowkeyList) throws IOException {
List<Get> getList = new ArrayList();
List<String> list = new ArrayList<>();
int i = 0;
Table table = null;// 获取表
try {
table = connection.getTable(TableName.valueOf(tableName));
} catch (IOException e) {
e.printStackTrace();
}
for (String rowkey : rowkeyList) {//把rowkey加到get里,再把get装到list中
Get get = new Get(Bytes.toBytes(rowkey));
getList.add(get);
}
Result[] results = table.get(getList);//重点在这,直接查getList<Get>
for (Result result : results) {//对返回的结果集进行操作
for (Cell kv : result.rawCells()) {
String value = Bytes.toString(CellUtil.cloneValue(kv));
list.add(value); i++;
}
}
System.out.println(i);
return list;
}
public static void main(String[] args) {
List<String> rowkeyList = new ArrayList<String>();
List<String> list = new ArrayList<String>();
int j = 0;
long start = System.currentTimeMillis();
//获取csv里的数据
List<String> list1 = new ArrayList<String>();
list1 = new CsvUtilData().readCsv("C:/scadadata/2019/10/TB001-10 00.csv");
for(int i = 1;i <list1.size();i++)
{
String[] spliststr = list1.get(i).split(",");
rowkeyList.add(spliststr[3]);
}
// String[] spliststr = list1.get(1).split(",");
// rowkeyList.add(spliststr[3]);
try {
list = ExportHbase.qurryTableTestBatch("stu5",rowkeyList);
} catch (IOException e) {
e.printStackTrace();
}
// System.out.println(System.currentTimeMillis() - start);
System.out.println(list.size());
}
}
运行效率,插入128000条数据花了6秒-7秒,存在可以进行优化的地方。