3.2HBase部分
3.2.1通过Hbase的shell命令创建HBase列式存储数据表格,其中每一行的数据格式如下:
行键(number) | 列簇1(information) | 列簇2(score) | 列簇3(stat_score) | |||||
列名(name) | 列名(sex) | 列名(age) | 列名(123001) | 列名(123002) | 列名(123003) | 列名(sum) | 列名 (avg) | |
学号 | 姓名 | 性别 | 年龄 | 成绩 | 成绩 | 成绩 | 总成绩 | 平均成绩 |
3.2.2请使用HBASE提供的API编程,实现向1)创建的HBase表中插入类似于下表中的数据(完整数据在附录中),列簇3部分先用”NILL”补充。
学生表(Student)
学号(S_No) | 姓名(S_Name) | 性别(S_Sex) | 年龄(S_Age) |
2015001 | Zhangsan | male | 23 |
2015002 | Mary | female | 22 |
2015003 | Lisi | male | 24 |
… | … | … | … |
选课表(SC)
学号(SC_Sno) | 课程号(SC_Cno) | 成绩(SC_Score) |
2015001 | 123001 | 86 |
2015001 | 123002 | 75 |
2015001 | 123003 | 69 |
2015002 | 123001 | 77 |
2015002 | 123002 | 99 |
2015002 | 123003 | 98 |
2015003 | 123001 | 98 |
2015003 | 123002 | 95 |
2015003 | 123003 | 94 |
… | … | … |
3.2.3请使用Spark编程实现对每个学生所有课程总成绩与平均成绩的统计聚合,并将聚合结果存储到1)中创建的HBase表。(可以考虑将聚合后的结果先存入HDFS文件,再从HDFS文件载入数据到HBase,也可以使用API将聚合结果直接插入到HBase表。)
3.2.4请使用Hbase提供的API编程,完成以下指定功能:
- 先添加一个学生用户,再使用addRecord(String tableName, String row, String[] fields, String[] values);向表tableName、行row(用S_Name表示)和字符串数组files指定的单元格中添加对应的数据values。其中fields中每个元素如果对应的列族下还有相应的列限定符的话,用“columnFamily:column”表示。例如,同时向“123001”、“123002”、“123003”三列添加成绩时,字符串数组fields为{“score: 123001”,”score;123002”,”score: 123003”},数组values存储这三门课的成绩。
- scanColumn(String tableName, String column);浏览表tableName某一列的数据,如果某一行记录中该列数据不存在,则返回null。要求当参数column为某一列族名称时,如果底下有若干个列限定符,则要列出每个列限定符代表的列的数据;当参数column为某一列具体名称(例如“Score:123002”)时,只需要列出该列的数据。
- deleteRow(String tableName, String row);删除表tableName中row指定的行的记录。
声明:以下代码是基于topduke学长的代码进行部分修改
/*
* 创建一个students表,并进行相关操作
*/
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.HColumnDescriptor;
import org.apache.hadoop.hbase.HTableDescriptor;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.client.Delete;
import org.apache.hadoop.hbase.client.Get;
import org.apache.hadoop.hbase.client.HBaseAdmin;
import org.apache.hadoop.hbase.client.HTable;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.ResultScanner;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.util.Bytes;
public class HBaseJavaAPI {
// 声明静态配置
private static Configuration conf = null;
static {
conf = HBaseConfiguration.create();
conf.set("hbase.zookeeper.quorum", "192.168.56.121");
conf.set("hbase.zookeeper.property.clientPort", "2181");
}
//判断表是否存在
private static boolean isExist(String tableName) throws IOException {
HBaseAdmin hAdmin = new HBaseAdmin(conf);
return hAdmin.tableExists(tableName);
}
// 创建数据库表
public static void createTable(String tableName, String[] columnFamilys)
throws Exception {
// 新建一个数据库管理员
HBaseAdmin hAdmin = new HBaseAdmin(conf);
if (hAdmin.tableExists(tableName)) {
System.out.println("表 "+tableName+" 已存在!");
//System.exit(0);
} else {
// 新建一个students表的描述
HTableDescriptor tableDesc = new HTableDescriptor(tableName);
// 在描述里添加列族
for (String columnFamily : columnFamilys) {
tableDesc.addFamily(new HColumnDescriptor(columnFamily));
}
// 根据配置好的描述建表
hAdmin.createTable(tableDesc);
System.out.println("创建表 "+tableName+" 成功!");
}
}
// 删除数据库表
public static void deleteTable(String tableName) throws Exception {
// 新建一个数据库管理员
HBaseAdmin hAdmin = new HBaseAdmin(conf);
if (hAdmin.tableExists(tableName)) {
// 关闭一个表
hAdmin.disableTable(tableName);
hAdmin.deleteTable(tableName);
System.out.println("删除表 "+tableName+" 成功!");
} else {
System.out.println("删除的表 "+tableName+" 不存在!");
System.exit(0);
}
}
// 添加一条数据
public static void addRecord(String tableName, String row,
String columnFamily, String column, String value) throws Exception {
HTable table = new HTable(conf, tableName);
Put put = new Put(Bytes.toBytes(row));// 指定行
// 参数分别:列族、列、值
put.add(Bytes.toBytes(columnFamily), Bytes.toBytes(column),
Bytes.toBytes(value));
table.put(put);
}
// 添加多条数据
public static void addRecords(String tableName, String row,String[] fields, String[] values) throws Exception {
int length = fields.length;
for(int i = 0;i < length; i++) {
String[] tmp = fields[i].split(":");
String columnFamily = tmp[0];
String column = tmp[1];
String value = values[i];
addRecord(tableName,row,columnFamily,column,value);
}
}
// 删除一条(行)数据
public static void deleteRow(String tableName, String row) throws Exception {
HTable table = new HTable(conf, tableName);
Delete del = new Delete(Bytes.toBytes(row));
table.delete(del);
}
// 获取指定数据
public static void scanColumn(String tableName, String column) throws Exception {
HTable table = new HTable(conf, tableName);
Scan scan = new Scan();
ResultScanner results = table.getScanner(scan);
String[] tmp = column.split(":");
String family = tmp[0];
String qualifier = tmp[1];
// 输出结果,raw方法返回所有keyvalue数组
for (Result result : results) {
for (KeyValue rowKV : result.raw()) {
String Family = new String(rowKV.getFamily());
String Qualifier = new String(rowKV.getQualifier());
if(Family.equals(family) && Qualifier.equals(qualifier)) {
System.out.print("行名:" + new String(rowKV.getRow()) + " ");
System.out.print("时间戳:" + rowKV.getTimestamp() + " ");
System.out.print("列族名:" + new String(rowKV.getFamily()) + " ");
System.out.print("列名:" + new String(rowKV.getQualifier()) + " ");
System.out.println("值:" + new String(rowKV.getValue()));
}
}
}
}
// 获取所有数据
public static void getAllRows(String tableName) throws Exception {
HTable table = new HTable(conf, tableName);
Scan scan = new Scan();
ResultScanner results = table.getScanner(scan);
// 输出结果
for (Result result : results) {
for (KeyValue rowKV : result.raw()) {
System.out.print("行名:" + new String(rowKV.getRow()) + " ");
System.out.print("时间戳:" + rowKV.getTimestamp() + " ");
System.out.print("列族名:" + new String(rowKV.getFamily()) + " ");
System.out.print("列名:" + new String(rowKV.getQualifier()) + " ");
System.out.println("值:" + new String(rowKV.getValue()));
}
}
}
// 计算并插入成绩
public static void addScore(String tableName, String row) throws Exception {
HTable table = new HTable(conf, tableName);
Get get = new Get(Bytes.toBytes(row));
Result result = table.get(get);
int S = 0;
int A = 0;
for (KeyValue rowKV : result.raw()) {
String Target = "score";
String Family = new String(rowKV.getFamily());
if (Target.equals(Family)) {
String str = new String(rowKV.getValue());
S = S + Integer.valueOf(str);
}
}
A = S / 3;
String Sum = String.valueOf(S);
String Avg = String.valueOf(A);
HBaseJavaAPI.addRecord(tableName, row, "stat_score", "sum",Sum);
HBaseJavaAPI.addRecord(tableName, row, "stat_score", "avg",Avg);
}
// 主函数
public static void main(String[] args) {
try {
String tableName = "student";
// 创建数据库表:“student”
String[] columnFamilys = { "information", "score", "stat_score" };
HBaseJavaAPI.createTable(tableName, columnFamilys);
// 向数据表的添加数据
if (isExist(tableName)) {
// 添加第一行数据
HBaseJavaAPI.addRecord(tableName, "2015001", "information", "name", "Zhangsan");
HBaseJavaAPI.addRecord(tableName, "2015001", "information", "age", "23");
HBaseJavaAPI.addRecord(tableName, "2015001", "information", "sex", "male");
HBaseJavaAPI.addRecord(tableName, "2015001", "score", "123001","86");
HBaseJavaAPI.addRecord(tableName, "2015001", "score", "123002","75");
HBaseJavaAPI.addRecord(tableName, "2015001", "score", "123003","69");
HBaseJavaAPI.addRecord(tableName, "2015001", "stat_score", "sum","NULL");
HBaseJavaAPI.addRecord(tableName, "2015001", "stat_score", "avg","NULL");
HBaseJavaAPI.addScore(tableName, "2015001");
// 添加第二行数据
HBaseJavaAPI.addRecord(tableName, "2015002", "information", "name", "Mary");
HBaseJavaAPI.addRecord(tableName, "2015002", "information", "age", "22");
HBaseJavaAPI.addRecord(tableName, "2015002", "information", "sex","female");
HBaseJavaAPI.addRecord(tableName, "2015002", "score", "123001","77");
HBaseJavaAPI.addRecord(tableName, "2015002", "score", "123002","99");
HBaseJavaAPI.addRecord(tableName, "2015002", "score", "123003","98");
HBaseJavaAPI.addRecord(tableName, "2015002", "stat_score", "sum","NULL");
HBaseJavaAPI.addRecord(tableName, "2015002", "stat_score", "avg","NULL");
HBaseJavaAPI.addScore(tableName, "2015002");
// 添加第三行数据
HBaseJavaAPI.addRecord(tableName, "2015003", "information", "name", "Lisi");
HBaseJavaAPI.addRecord(tableName, "2015003", "information", "age", "24");
HBaseJavaAPI.addRecord(tableName, "2015003", "information", "sex","male");
HBaseJavaAPI.addRecord(tableName, "2015003", "score", "123001","98");
HBaseJavaAPI.addRecord(tableName, "2015003", "score", "123002","95");
HBaseJavaAPI.addRecord(tableName, "2015003", "score", "123003","94");
HBaseJavaAPI.addRecord(tableName, "2015003", "stat_score", "sum","NULL");
HBaseJavaAPI.addRecord(tableName, "2015003", "stat_score", "avg","NULL");
HBaseJavaAPI.addScore(tableName, "2015003");
// 添加第四行数据
String[] fields = { "information:name", "information:age", "information:sex",
"score:123001", "score:123002", "score:123003" };
String[] values = { "zouyi", "20", "male", "77", "88", "99" };
HBaseJavaAPI.addRecords(tableName, "2015004",fields,values);
HBaseJavaAPI.addScore(tableName, "2015004");
// 获取指定数据
//System.out.println("**************获取(score:123002)数据*************");
//HBaseJavaAPI.scanColumn(tableName, "score:123002");
// 获取所有数据
System.out.println("**************获取所有数据***************");
HBaseJavaAPI.getAllRows(tableName);
// 删除一条数据
System.out.println("************删除一条(2015001)数据************");
HBaseJavaAPI.deleteRow(tableName, "2015001");
HBaseJavaAPI.getAllRows(tableName);
// 删除数据库
//System.out.println("***************删除数据库表**************");
//HBaseJavaAPI.deleteTable(tableName);
//System.out.println("表"+tableName+"存在吗?"+isExist(tableName));
} else {
System.out.println(tableName + "此数据库表不存在!");
}
} catch (Exception e) {
e.printStackTrace();
}
}
}