第1关:HBase-shell命令
任务描述
使用HBase shell
命令创建表:exam_tb1
,向表中添加数据,表的数据与结构如下:
#启动HBAse
start-hbase.sh
hbase shell
#创建表
create 'exam_tb1', {NAME=>'user_info'},{NAME=>'class_info'}
#添加数据
put 'exam_tb1','row-1','user_info:name','jack'
put 'exam_tb1','row-1','user_info:age','32'
put 'exam_tb1','row-1','class_info:class_name','software'
put 'exam_tb1','row-1','class_info:class_id','201801'
put 'exam_tb1','row-2','user_info:name','rose'
put 'exam_tb1','row-2','user_info:age','28'
put 'exam_tb1','row-2','class_info:class_name','hardware'
put 'exam_tb1','row-2','class_info:class_id','201802'
第2关:Hbase Java API
任务描述
在右侧编写代码,禁用表step2_tb0
,删除表step2_tb1
,创建表emp_tb1
,emp_tb1
表结构与数据如下图:
package step2;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import org.apache.hadoop.conf.*;
import org.apache.hadoop.hbase.*;
import org.apache.hadoop.hbase.client.*;
import org.apache.hadoop.hbase.io.compress.Compression;
import org.apache.hadoop.hbase.util.*;
public class Task {
public void operatorTables()throws Exception{
/********* Begin *********/
Configuration conf = HBaseConfiguration.create(); //使用create()静态方法就可以得到Configuration对象
Connection conn = ConnectionFactory.createConnection(conf); //config为前文的配置对象
Admin admin = conn.getAdmin(); //使用连接对象获取Admin对象
TableName tableName = TableName.valueOf("emp_tb1");//定义表名
HTableDescriptor htd = new HTableDescriptor(tableName);//定义表对象
HColumnDescriptor hcd1 = new HColumnDescriptor("emp_info");//定义列族对象
HColumnDescriptor hcd2 = new HColumnDescriptor("dept_info");//定义列族对象
htd.addFamily(hcd1); //添加
htd.addFamily(hcd2); //添加
admin.createTable(htd);//创建表
// 停用表
admin.disableTable(TableName.valueOf("step2_tb0"));
// 停用表
admin.disableTable(TableName.valueOf("step2_tb1"));
// 删除表
admin.deleteTable(TableName.valueOf("step2_tb1"));
// 获取一个操作指定表的table对象,进行DML操作
Table table = conn.getTable(TableName.valueOf("emp_tb1"));
// 构造要插入的数据为一个Put类型(一个put对象只能对应一个rowkey)的对象
Put put = new Put(Bytes.toBytes("201101"));
put.addColumn(Bytes.toBytes("emp_info"), Bytes.toBytes("emp_name"), Bytes.toBytes("lucy"));
put.addColumn(Bytes.toBytes("emp_info"), Bytes.toBytes("emp_id"), Bytes.toBytes("1"));
put.addColumn(Bytes.toBytes("dept_info"), Bytes.toBytes("gender"), Bytes.toBytes("man"));
put.addColumn(Bytes.toBytes("dept_info"), Bytes.toBytes("dept_id"), Bytes.toBytes("2001"));
put.addColumn(Bytes.toBytes("dept_info"), Bytes.toBytes("dept_name"), Bytes.toBytes("finance"));
Put put2 = new Put(Bytes.toBytes("201102"));
put2.addColumn(Bytes.toBytes("emp_info"), Bytes.toBytes("emp_name"), Bytes.toBytes("alpha"));
put2.addColumn(Bytes.toBytes("emp_info"), Bytes.toBytes("emp_id"), Bytes.toBytes("2"));
put2.addColumn(Bytes.toBytes("dept_info"), Bytes.toBytes("gender"), Bytes.toBytes("woman"));
put2.addColumn(Bytes.toBytes("dept_info"), Bytes.toBytes("dept_id"), Bytes.toBytes("2003"));
put2.addColumn(Bytes.toBytes("dept_info"), Bytes.toBytes("dept_name"), Bytes.toBytes("techenology"));
Put put3 = new Put(Bytes.toBytes("201103"));
put3.addColumn(Bytes.toBytes("emp_info"), Bytes.toBytes("emp_name"), Bytes.toBytes("linus"));
put3.addColumn(Bytes.toBytes("emp_info"), Bytes.toBytes("emp_id"), Bytes.toBytes("3"));
put3.addColumn(Bytes.toBytes("dept_info"), Bytes.toBytes("gender"), Bytes.toBytes("man"));
put3.addColumn(Bytes.toBytes("dept_info"), Bytes.toBytes("dept_id"), Bytes.toBytes("3002"));
put3.addColumn(Bytes.toBytes("dept_info"), Bytes.toBytes("dept_name"), Bytes.toBytes("logistics"));
ArrayList<Put> puts = new ArrayList<>();
puts.add(put);
puts.add(put2);
puts.add(put3);
// 插进去
table.put(puts);
table.close();
conn.close();
/********* End *********/
}
}
第3关:HBase扫描
任务描述
在右侧sanTable(String tablename)
方法中扫描表中的数据并输出(tablename
为要扫描的表名),扫描起止行要求从row-10
开始至row-50
,且只扫描info:name
列。
package step3;
import java.io.IOException;
import org.apache.hadoop.conf.*;
import org.apache.hadoop.hbase.*;
import org.apache.hadoop.hbase.client.*;
import org.apache.hadoop.hbase.util.*;
public class Task {
public void scanTable(String tableName)throws Exception{
/********* Begin *********/
Configuration config = HBaseConfiguration.create();
Connection conn = ConnectionFactory.createConnection(config);
Admin admin = conn.getAdmin();
TableName tablename=TableName.valueOf(tableName);
Table table=conn.getTable(tablename);
Scan scan = new Scan();
scan.addColumn(Bytes.toBytes("info"),Bytes.toBytes("name"));
scan.setStartRow(Bytes.toBytes("row-10"));
scan.setStopRow(Bytes.toBytes("row-50"));
ResultScanner scanner = table.getScanner(scan);
for(Result result:scanner){
System.out.println(Bytes.toString(result.getRow()));
for(Cell cell:result.listCells()){
String family = Bytes.toString(CellUtil.cloneFamily(cell));
String qualifier = Bytes.toString(CellUtil.cloneQualifier(cell));
String value = Bytes.toString(CellUtil.cloneValue(cell));
System.out.println("\t"+family+ ":" + qualifier + " " + value);
}
}
/********* End *********/
}
}
第4关:HBase过滤器
任务描述
在右侧query(String tName)
中编写代码,使用过滤器查询出如下数据:
- 查询行健大于等于
row20
且data:phone
列的值是正确的手机号码的数据并输出该行的所有数据;
手机号的规则为:1
开头,第二位是[3,4,5,7,8]
中的任意一位,第三位到十一位都为0-9
的数字。
package step4;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import org.apache.hadoop.cli.util.*;
import org.apache.hadoop.conf.*;
import org.apache.hadoop.hbase.*;
import org.apache.hadoop.hbase.client.*;
import org.apache.hadoop.hbase.filter.*;
import org.apache.hadoop.hbase.filter.SubstringComparator;
import org.apache.hadoop.hbase.util.*;
public class Task {
public void query(String tName) throws Exception {
/********* Begin *********/
Configuration conf = HBaseConfiguration.create();
Connection connection = ConnectionFactory.createConnection(conf);
Admin admin = connection.getAdmin();
TableName tablename = TableName.valueOf(tName);
Table table = connection.getTable(tablename);
//行键大于20
Filter equalFilter1 = new RowFilter(CompareOperator.GREATER_OR_EQUAL,
new BinaryComparator(Bytes.toBytes("row20")));
//单列值过滤器 电话号码
SingleColumnValueFilter valueFilter =new SingleColumnValueFilter(Bytes.toBytes("data"),Bytes.toBytes("phone"),CompareOperator.EQUAL,new RegexStringComparator("^1[3|4|5|7|8][0-9]{9}$"));
List<Filter> list =new ArrayList<>();
list.add(valueFilter);
list.add(equalFilter1);
FilterList filterList1 =new FilterList(FilterList.Operator.MUST_PASS_ALL,list);
Scan scan1 = new Scan();
scan1.setFilter(filterList1);
ResultScanner scanner1 = table.getScanner(scan1);
for (Result result : scanner1) {
System.out.println(Bytes.toString(result.getRow()));
for(Cell cell : result.listCells()){
String family = Bytes.toString(CellUtil.cloneFamily(cell));
String qualifier = Bytes.toString(CellUtil.cloneQualifier(cell));
String value = Bytes.toString(CellUtil.cloneValue(cell));
System.out.println("\t" + family + ":" + qualifier + " " + value);
}
}
scanner1.close();
connection.close();
/********* End *********/
}
}