package com.shujia;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.HColumnDescriptor;
import org.apache.hadoop.hbase.HTableDescriptor;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.Admin;
import org.apache.hadoop.hbase.client.Connection;
import org.apache.hadoop.hbase.client.ConnectionFactory;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;
import java.io.IOException;
public class Demo03DianXIN {
Connection conn;
Admin admin;
TableName dianXinName;
@Before
public void init() throws IOException {
//创建HBase的配置
Configuration conf = HBaseConfiguration.create();
//配置HBase所属ZK集群的地址
conf.set("hbase.zookeeper.quorum","master:2181,node1:2181,node2:2181");
//建立连接
conn = ConnectionFactory.createConnection(conf);
//获取所有表的名字,这个操作跟表的数据无关
admin= conn.getAdmin();
dianXinName=TableName.valueOf("dian_xin");
}
/**
* 创建dian_xin表(一个列簇:cf1,版本号设为5,)用于保存数据(写入时以start_time作为时间戳以此来确定最新的位置)
* 需求:给定一个mdn,获取该用户最新的5个位置(经纬度)
*/
@Test
public void createTable() throws IOException {
if(!admin.tableExists(dianXinName)){
admin.createTable(new HTableDescriptor(dianXinName)
.addFamily(new HColumnDescriptor("cf1").setMaxVersions(5)));
}
System.out.println("表已创建");
}
@After
public void close() throws IOException {
//关闭连接
admin.close();
conn.close();
}
}
hbase(main):003:0> list
TABLE
dian_xin
ns1:tb3
stu
tb1
tb1_tt1
tb1_v
tb2
tb4
8 row(s) in 0.0090 seconds
=> ["dian_xin", "ns1:tb3", "stu", "tb1", "tb1_tt1", "tb1_v", "tb2", "tb4"]
hbase(main):004:0> desc 'dian_xin'
Table dian_xin is ENABLED
dian_xin
COLUMN FAMILIES DESCRIPTION
{NAME => 'cf1', BLOOMFILTER => 'ROW', VERSIONS => '5', IN_MEMORY
=> 'false', KEEP_DELETED_CELLS => 'FALSE', DATA_BLOCK_ENCODING
=> 'NONE', TTL => 'FOREVER', COMPRESSION => 'NONE', MIN_VERSIONS
=> '0', BLOCKCACHE => 'true', BLOCKSIZE => '65536', REPLICATION
_SCOPE => '0'}
1 row(s) in 0.0260 seconds
package com.shujia;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.*;
import org.apache.hadoop.hbase.client.*;
import org.apache.hadoop.hbase.util.Bytes;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;
import java.io.BufferedReader;
import java.io.FileReader;
import java.io.IOException;
import java.util.ArrayList;
public class Demo03DianXIN {
Connection conn;
Admin admin;
TableName dianXinName;
@Before
public void init() throws IOException {
//创建HBase的配置
Configuration conf = HBaseConfiguration.create();
//配置HBase所属ZK集群的地址
conf.set("hbase.zookeeper.quorum","master:2181,node1:2181,node2:2181");
//建立连接
conn = ConnectionFactory.createConnection(conf);
//获取所有表的名字,这个操作跟表的数据无关
admin= conn.getAdmin();
dianXinName=TableName.valueOf("dian_xin");
}
/**
* 创建dian_xin表(一个列簇:cf1,版本号设为5,)用于保存数据(写入时以start_time作为时间戳以此来确定最新的位置)
* 需求:给定一个mdn,获取该用户最新的5个位置(经纬度)
*/
@Test
public void createTable() throws IOException {
if(!admin.tableExists(dianXinName)){
admin.createTable(new HTableDescriptor(dianXinName)
.addFamily(new HColumnDescriptor("cf1").setMaxVersions(5)));
}
System.out.println("表已创建");
}
@Test
//插入数据
public void putAll() throws IOException {
BufferedReader br = new BufferedReader(new FileReader("data/DIANXIN.csv"));
String line;
Table dianXin = conn.getTable(dianXinName);
ArrayList<Put> puts = new ArrayList<>();
int batchSize=1000;
while((line=br.readLine())!=null) {
//对每一行数据进行操作
String[] splits = line.split(",");
String mdn = splits[0];
String start_time = splits[1];
String longitude = splits[4];
String latitude = splits[5];
//以mdn作为RowKey
Put put = new Put(mdn.getBytes());
//以start_time作为时间戳
put.addColumn("cf1".getBytes(), "lg".getBytes(), Long.parseLong(start_time), longitude.getBytes());
put.addColumn("cf1".getBytes(), "lat".getBytes(), Long.parseLong(start_time), longitude.getBytes());
puts.add(put);
if (puts.size() == batchSize) {
dianXin.put(puts);
puts.clear();
}
}
if(puts.size()!=0){
dianXin.put(puts);
}
}
@Test
//根据mdn获取该用户最新的5个位置
public void getPostionWithMdn() throws IOException {
Table dian_xin = conn.getTable(dianXinName);
int maxVersion=5;
String mdn="47BE1E866CFC071DB19D5E1C056BE28AE24C16E7";
Get get = new Get(mdn.getBytes());
get.setMaxVersions(maxVersion);
Result rs = dian_xin.get(get);
// String lg = Bytes.toString(rs.getValue("cf1".getBytes(), "lg".getBytes()));
// String lat= Bytes.toString(rs.getValue("cf1".getBytes(), "lat".getBytes()));
// System.out.println(lg+","+lat);
ArrayList<String> lgArr = new ArrayList<>();
ArrayList<String> latArr = new ArrayList<>();
//如果需要取多版本的数据,可以借助CellUnit去取
for (Cell cell : rs.listCells()) {
String value = Bytes.toString(CellUtil.cloneValue(cell));
String qua = Bytes.toString(CellUtil.cloneQualifier(cell));
if("lg".equals(qua)){
lgArr.add(value);
}else if("lat".equals(qua)){
latArr.add(value);
}
}
System.out.println("经度\t\t纬度");
for(int i=0;i<maxVersion;i++){
System.out.println(lgArr.get(i)+"\t"+latArr.get(i));
}
}
@After
public void close() throws IOException {
//关闭连接
admin.close();
conn.close();
}
}