实验材料及说明
在Ubuntu系统的/学号(每个人之间的学号)/salesInfo目录下,有买家的购买记录文件Sales,该文件记录了买家的id,购买商品的id以及购买日期,文件为名为Sales。Sales包含:买家ID、商品ID、购买日期三个字段,数据以“\t”进行分割,样本数据及格式如下:
买家ID 商品ID 购买日期
1000181 1000481 2021-04-04 16:54:31
2000001 1001597 2021-04-07 15:07:52
2000001 1001560 2021-04-07 15:08:27
2000042 1001368 2021-04-08 08:20:30
2000067 1002061 2021-04-08 16:45:33
2000056 1003289 2021-04-12 10:50:55
2000056 1003290 2021-04-12 11:57:35
2000056 1003292 2021-04-12 12:05:29
2000054 1002420 2021-04-14 15:24:12
2000055 1001679 2021-04-14 19:46:04
2000054 1010675 2021-04-14 15:23:53
2000054 1002429 2021-04-14 17:52:45
2000076 1002427 2021-04-14 19:35:39
2000054 1003326 2021-04-20 12:54:44
2000056 1002420 2021-04-15 11:24:49
2000064 1002422 2021-04-15 11:35:54
2000056 1003066 2021-04-15 11:43:01
2000056 1003055 2021-04-15 11:43:06
2000056 1010183 2021-04-15 11:45:24
2000056 1002422 2021-04-15 11:45:49
2000056 1003100 2021-04-15 11:45:54
2000056 1003094 2021-04-15 11:45:57
2000056 1003064 2021-04-15 11:46:04
2000056 1010178 2021-04-15 16:15:20
2000076 1003101 2021-04-15 16:37:27
2000076 1003103 2021-04-15 16:37:05
2000076 1003100 2021-04-15 16:37:18
2000076 1003066 2021-04-15 16:37:31
要求根据要求撰写实验报告,实验报告需要包括实验原理、算法设计思路、代码、代码调试说明、实验过程中碰到的问题和代码改进建议等内容。实验报告文件命名规则:HadoopLabX-学号-姓名.doc(X=1,2,3)。具体而言,实验报告需要包括以下内容:
实验目的
掌握HBase的安装、写入和查询操作。即,要求在Hbase中创建Sales表;创建PutData类,将Sales文件中的所有数据写入Sales表中;并创建GetData类,查询Sales表中rowkey为10010的数据。
1.在hbase中创建Sales表
package shiyan;
import java.io.IOException;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.HColumnDescriptor;
import org.apache.hadoop.hbase.HTableDescriptor;
import org.apache.hadoop.hbase.client.HBaseAdmin;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.conf.Configuration;
public class CreateTable {
public static void main(String[] args) throws IOException {
// Instantiating configuration class 初始化配置文件
Configuration con = HBaseConfiguration.create();
// Instantiating HbaseAdmin class 初始化HbaseAdmin
HBaseAdmin admin = new HBaseAdmin(con);
// Instantiating table descriptor class 设置表名
HTableDescriptor tableDescriptor = new HTableDescriptor(TableName.valueOf("Sales"));
// Adding column families to table descriptor 设置列族名(可设置多个)
tableDescriptor.addFamily(new HColumnDescriptor("buyerID"));
tableDescriptor.addFamily(new HColumnDescriptor("goodsID"));
tableDescriptor.addFamily(new HColumnDescriptor("time"));
admin.createTable(tableDescriptor);
System.out.println("created Table success!");
}
2.从本地批量上传数据到hbase中的表中
package shiyan;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.*;
import org.apache.hadoop.hbase.client.*;
import org.apache.hadoop.hbase.util.Bytes;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.util.ArrayList;
import java.util.LinkedList;
import java.util.List;
public class putdata{
private static final String TABLE_NAME="sales";
public static final String FAMILY_NAME_1 = "goodsid";
public static final String FAMILY_NAME_2= "buytime";
//conf
public static void main(String [] args){
Configuration conf = HBaseConfiguration.create();
conf.set("hbase.zookeeper.quorum",
"localhost");
conf.set("zookeeper.znode.parent", "/hbase");
List<Put> list = new ArrayList<Put>();
File file = new File("/1863710117/salesInfo/Sales");
BufferedReader reader = null;
String lineString = null;
Connection connection = null;
Table table = null;
try {
connection = ConnectionFactory.createConnection(conf);
Admin admin = connection.getAdmin();
table = connection.getTable(TableName.valueOf(TABLE_NAME));
reader = new BufferedReader(new FileReader(file));
while ((lineString = reader.readLine()) != null) {
String[] lines = lineString.split(" ");
Put put = new Put(lines[0].getBytes());
put.addColumn(FAMILY_NAME_1.getBytes(), null, lines[1].getBytes());
put.addColumn(FAMILY_NAME_2.getBytes(), null, lines[2].getBytes());
list.add(put);
}
table.put(list);
System.out.println("success!");
reader.close();
} catch (FileNotFoundException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
} finally {
if (reader != null) {
try {
reader.close();
} catch (IOException e) {
e.printStackTrace();
}
}
if (table != null)
try {
table.close();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
if (connection != null)
try {
connection.close();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}
}
3.使用一个程序实现创建、上传数据、查询数据
package shiyan;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.*;
import org.apache.hadoop.hbase.client.*;
import org.apache.hadoop.hbase.util.Bytes;
import java.io.IOException;
import java.util.LinkedList;
import java.util.List;
public class addRowData{
private static final String TABLE_NAME="sales";
// public static final String FAMILY_NAME_1 = "buyerid";
public static final String FAMILY_NAME_1 = "goodsid";
public static final String FAMILY_NAME_2= "buytime";
//conf
private static Configuration getHBaseConfiguration() {
Configuration conf = HBaseConfiguration.create();
conf.set("hbase.zookeeper.quorum",
"localhost");
conf.set("zookeeper.znode.parent", "/hbase");
return conf;
}
//createTable
private static void createTable(Configuration conf) throws IOException {
Connection connection = null;
Table table = null;
try {
connection = ConnectionFactory.createConnection(conf);
Admin admin = connection.getAdmin();
if (!admin.tableExists(TableName.valueOf(TABLE_NAME))) {
//create table ,create family
HTableDescriptor tableDescriptor = new HTableDescriptor(TableName.valueOf(TABLE_NAME));
HColumnDescriptor columnDescriptor_1 = new HColumnDescriptor(Bytes.toBytes(FAMILY_NAME_1));
HColumnDescriptor columnDescriptor_2 = new HColumnDescriptor(Bytes.toBytes(FAMILY_NAME_2));
// HColumnDescriptor columnDescriptor_3 = new HColumnDescriptor(Bytes.toBytes(FAMILY_NAME_3));
tableDescriptor.addFamily(columnDescriptor_1);
tableDescriptor.addFamily(columnDescriptor_2);
// tableDescriptor.addFamily(columnDescriptor_3);
admin.createTable(tableDescriptor);
} else {
System.err.println("table is exists!!!!!");
}
//put data
table = connection.getTable(TableName.valueOf(TABLE_NAME));
Put put=new Put(Bytes.toBytes("1000181")); //rowkey 1
put.addColumn(Bytes.toBytes(FAMILY_NAME_1), Bytes.toBytes("gid"), Bytes.toBytes("1000481"));
put.addColumn(Bytes.toBytes(FAMILY_NAME_2), Bytes.toBytes("btime"), Bytes.toBytes("2021-04-04 16:54:31"));
// put.addColumn(Bytes.toBytes(FAMILY_NAME_3), Bytes.toBytes(""), Bytes.toBytes(""));
Put put2=new Put(Bytes.toBytes("2000001")); //rowkey 1
put2.addColumn(Bytes.toBytes(FAMILY_NAME_1), Bytes.toBytes("gid"), Bytes.toBytes("1001597"));
put2.addColumn(Bytes.toBytes(FAMILY_NAME_2), Bytes.toBytes("btime"), Bytes.toBytes("2021-04-07 15:07:52"));
Put put3=new Put(Bytes.toBytes("2000001")); //rowkey 1
put3.addColumn(Bytes.toBytes(FAMILY_NAME_1), Bytes.toBytes("gid"), Bytes.toBytes("1001560"));
put3.addColumn(Bytes.toBytes(FAMILY_NAME_2), Bytes.toBytes("btime"), Bytes.toBytes("2021-04-07 15:08:27"));
Put put4=new Put(Bytes.toBytes("2000042")); //rowkey 1
put4.addColumn(Bytes.toBytes(FAMILY_NAME_1), Bytes.toBytes("gid"), Bytes.toBytes("1001368"));
put4.addColumn(Bytes.toBytes(FAMILY_NAME_2), Bytes.toBytes("btime"), Bytes.toBytes(" 2021-04-08 08:20:30"));
Put put5=new Put(Bytes.toBytes("2000067")); //rowkey 1
put5.addColumn(Bytes.toBytes(FAMILY_NAME_1), Bytes.toBytes("gid"), Bytes.toBytes("1002061"));
put5.addColumn(Bytes.toBytes(FAMILY_NAME_2), Bytes.toBytes("btime"), Bytes.toBytes("2021-04-08 16:45:33"));
table.put(put);
table.put(put2);
table.put(put3);
table.put(put4);
table.put(put5);
Get getbuyrokey=new Get(Bytes.toBytes("1000181"));
//String result=getE001.getFamilyMap().get("salary").toString();
//System.out.println(result);
byte [] ss=table.get(getbuyrokey).getValue(Bytes.toBytes(FAMILY_NAME_1),Bytes.toBytes("gid"));
System.out.print("读出rowkey为 “1000181” 的goodsiD:gid: "+new String(ss)+" ");
byte [] ss1=table.get(getbuyrokey).getValue(Bytes.toBytes(FAMILY_NAME_2),Bytes.toBytes("btime"));
System.out.println("buytime:btime: "+new String(ss1));
Scan scan=new Scan();
scan.setStartRow(Bytes.toBytes("1000181"));
scan.setStopRow(Bytes.toBytes("2000067"));
scan.addColumn(Bytes.toBytes(FAMILY_NAME_1), Bytes.toBytes("gid"));
scan.setCaching(100);
ResultScanner results=table.getScanner(scan);
for (Result result : results) {
while (result.advance()) {
// System.out.println("goodid :"+new String(result.current().getValue()));
}
}
}catch (IOException e){
e.printStackTrace();
}finally {
//close
if (table != null) table.close();
if (connection != null) connection.close();
}
}
public static void main(String [] args){
Configuration conf=getHBaseConfiguration();
try {
createTable(conf);
} catch (IOException e) {
e.printStackTrace();
}finally {
}
}
}
4.从hbase中查询数据
package shiyan;
import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.client.Get;
import org.apache.hadoop.hbase.client.HTable;
import org.apache.hadoop.hbase.client.HTablePool;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.ResultScanner;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.filter.CompareFilter.CompareOp;
import org.apache.hadoop.hbase.filter.Filter;
import org.apache.hadoop.hbase.filter.FilterList;
import org.apache.hadoop.hbase.filter.SingleColumnValueExcludeFilter;
import org.apache.hadoop.hbase.filter.SingleColumnValueFilter;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.*;
import org.apache.hadoop.hbase.client.*;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.conf.Configuration;
import com.sun.corba.se.pept.transport.Connection;
import net.spy.memcached.ConnectionFactory;
public class GetDataTest {
private static Configuration config = null;
static {
config = HBaseConfiguration.create();
config.set("hbase.zookeeper.quorum","*.*.*.*");
config.set("hbase.zookeeper.property.clientPort","2181");
config = HBaseConfiguration.create(config);
}
//查找数据
public static void getOneRecord(String tableName, String rowKey) throws IOException {
HTable table = new HTable(config, tableName);
Get get = new Get(rowKey.getBytes());
Result rs = table.get(get);
for (KeyValue kv:rs.raw()) {
String timestampFormat = new SimpleDateFormat("yyyy-MM-dd HH:MM:ss").format(new Date(kv.getTimestamp()));
System.out.println("===:"+timestampFormat+" ==timestamp: "+kv.getTimestamp());
System.out.println("\nKeyValue: "+kv);
System.out.println("key: "+kv.getKeyString());
System.out.println(new String(kv.getRow()) + " " + new String(kv.getFamily()) + ":"
+ new String(kv.getQualifier()) + " " + kv.getTimestamp() + " " + new String(kv.getValue()));
}
}
//TODO:读取数据
/*public void getDataFromTable() throws IOException {
//1.获取hbase连接,配置
Configuration conf = HBaseConfiguration.create();
conf.set("hbase.zookeeper.quorum", "hadoop00");
conf.set("hbase.zookeeper.property.clientPort", "2181");
//2.创建连接
Connection conn = ConnectionFactory.createConnection(conf);
//HConnectionManager.getConnection(HBaseConfiguration.create());
// Connection connection = ConnectionFactory.createConnection(conf);
//3.获取table
Table TestTable = conn.getTable(TableName.valueOf("TestTable"));
//4.读取表中的数据,Get
Get get = new Get(Bytes.toBytes("1001"));
//5.获取结果
Result result = TestTable.get(get);
//6.遍历结果
Cell[] cells = result.rawCells();
for (Cell cell : cells) {
//获取具体的值
System.out.println("rowkey:" + Bytes.toString(CellUtil.cloneRow(cell)));
System.out.println("列族:" + Bytes.toInt(CellUtil.cloneFamily(cell)));
System.out.println("列名:" + Bytes.toString(CellUtil.cloneQualifier(cell)));
System.out.println("value:" + Bytes.toString(CellUtil.cloneValue(cell)));
System.out.println("_______________");
}
//7.关闭连接
conn.close();
}
*/
public static void main(String[] args) {
String tableName = "TestTable";
String columnFamily="f1";
try {
GetDataTest.getOneRecord(tableName, "1000181");
System.out.print("success!");
} catch (IOException e) {
// TODO Auto-generated catch bloc
e.printStackTrace();
}
}
}