大数据从入门到实战 - HBase 开发:批量操作
叮嘟!这里是小啊呜的学习课程资料整理。好记性不如烂笔头,今天也是努力进步的一天。一起加油进阶吧!
一、关于此次实践
1、实战简介
在实际的HBase开发中我们经常会使用批量操作,比如批量添加数据,批量删除数据,本次实训我们就来完成这些常用的批量操作。
实验环境:
hadoop2.7
JDK8.0
HBase2.1.1
2、全部任务
二、实践详解
1、第 1 关:批量获取数据
package step1;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.CellScanner;
import org.apache.hadoop.hbase.CellUtil;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.Admin;
import org.apache.hadoop.hbase.client.Connection;
import org.apache.hadoop.hbase.client.ConnectionFactory;
import org.apache.hadoop.hbase.client.Get;
import org.apache.hadoop.hbase.client.HTable;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.ResultScanner;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.client.Table;
import org.apache.hadoop.hbase.client.TableDescriptor;
import org.apache.hadoop.hbase.generated.rest.rest_jsp;
import org.apache.hadoop.hbase.util.Bytes;
public class Task {
public void batchGet() throws Exception {
/********* Begin *********/
Configuration config = HBaseConfiguration.create();
Connection Connection = ConnectionFactory.createConnection(config);
List<String> rows = new ArrayList<>();
rows.add("2018");
//rows.add("2019");
rows.add("2020");
TableName tableName = TableName.valueOf(Bytes.toBytes("step1_student"));
Table table = Connection.getTable(tableName);
getData(table,rows);
/********* End *********/
}
public List<String> getData(Table table, List<String> rows) throws Exception {
List<Get> gets = new ArrayList<>();
for (String str : rows) {
Get get = new Get(Bytes.toBytes(str));
gets.add(get);
}
List<String> values = new ArrayList<>();
Result[] results = table.get(gets);
for (Result result : results) {
System.out.println("Row:" + Bytes.toString(result.getRow()));
for (Cell kv : result.rawCells()) {
String family = Bytes.toString(CellUtil.cloneFamily(kv));
String qualifire = Bytes.toString(CellUtil.cloneQualifier(kv));
String value = Bytes.toString(CellUtil.cloneValue(kv));
values.add(value);
System.out.println(family + ":" + qualifire + "\t" + value);
}
}
return values;
}
}
命令行:
点击测评之前,请先开启 Hadoop ( start-dfs.sh )和 HBase ( start-hbase.sh ),
并且需要等待 HBase 初始化完成( 20 秒左右),否则无法在 HBase 中创建表。
测评
2、第 2 关:批量删除数据
package step2;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.HColumnDescriptor;
import org.apache.hadoop.hbase.HTableDescriptor;
import org.apache.hadoop.hbase.TableDescriptors;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.*;
import org.apache.hadoop.hbase.util.Bytes;
public class Task {
public void batchDelete()throws Exception{
/********* Begin *********/
Configuration conf = HBaseConfiguration.create();
Connection conn = ConnectionFactory.createConnection(conf);
TableName tableName = TableName.valueOf("step2_table");
Table table = conn.getTable(tableName);
List<String> rows1 = new ArrayList();
for(int i = 1; i<6;i++){
String row = "row" + i;
rows1.add(row);
}
delete(table,rows1);
List<String> rows2 = new ArrayList<>();
for(int i = 7;i<11;i++){
String row = "row" + i;
rows2.add(row);
}
delete(table,rows2);
/********* End *********/
}
public void delete(Table table,List<String> rows)throws IOException{
List<Delete> deletes = new ArrayList<>();
for(String str : rows){
byte[] row = Bytes.toBytes(str);
Delete delete = new Delete(row);
deletes.add(delete);
}
table.delete(deletes);
}
}
测评
3、第 3 关:批量导入数据至 HBase
package step3;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.HColumnDescriptor;
import org.apache.hadoop.hbase.HTableDescriptor;
import org.apache.hadoop.hbase.TableDescriptors;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.Admin;
import org.apache.hadoop.hbase.client.ColumnFamilyDescriptor;
import org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder;
import org.apache.hadoop.hbase.client.Connection;
import org.apache.hadoop.hbase.client.ConnectionFactory;
import org.apache.hadoop.hbase.client.Get;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.ResultScanner;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.client.Table;
import org.apache.hadoop.hbase.client.TableDescriptor;
import org.apache.hadoop.hbase.client.TableDescriptorBuilder;
import org.apache.hadoop.hbase.util.Bytes;
public class Task {
public void batchPut()throws Exception{
/********* Begin *********/
Configuration config = new Configuration();
Connection conn = ConnectionFactory.createConnection(config);
Admin admin = conn.getAdmin();
// 建表
TableName tableName = TableName.valueOf(Bytes.toBytes("stu"));
TableDescriptorBuilder builder = TableDescriptorBuilder.newBuilder(tableName);
ColumnFamilyDescriptor family = ColumnFamilyDescriptorBuilder.newBuilder(Bytes.toBytes("basic_info")).build();
ColumnFamilyDescriptor family2 = ColumnFamilyDescriptorBuilder.newBuilder(Bytes.toBytes("school_info")).build();
builder.setColumnFamily(family);
builder.setColumnFamily(family2);
admin.createTable(builder.build());
List<Put> puts = new ArrayList<>();
String[] rows = {"20181122","20181123"};
String[][] basic_infos = {{"阿克蒙德","male","1987-05-23","tel:139********","HUNan-ChangSha"},{"萨格拉斯","male","1986-05-23","tel:187********","HUNan-ChangSha"}};
String[] basic_colums = {"name","gender","birthday","connect","address"};
String[][] school_infos = {{"ChengXing","class 1 grade 2","Software"},{"ChengXing","class 2 grade 2","Software"}};
String[] school_colums = {"college","class","object"};
for (int x = 0; x < rows.length; x++) {
// 循环添加数据
Put put = new Put(Bytes.toBytes(rows[x]));
for (int i = 0; i < basic_infos.length; i++) {
byte[] columFamily = Bytes.toBytes("basic_info");
byte[] qualifier = Bytes.toBytes(basic_colums[i]);
byte[] value = Bytes.toBytes(basic_infos[x][i]);
put.addColumn(columFamily, qualifier, value);
}
for (int i = 0; i < school_infos.length; i++) {
byte[] columFamily = Bytes.toBytes("school_info");
byte[] qualifier = Bytes.toBytes(school_colums[i]);
byte[] value = Bytes.toBytes(school_infos[x][i]);
put.addColumn(columFamily, qualifier, value);
}
puts.add(put);
}
Table table = conn.getTable(tableName);
table.put(puts);
/********* End *********/
}
}
测评
Ending!
更多课程知识学习记录随后再来吧!
就酱,嘎啦!
注:
人生在勤,不索何获。