hbase查看hfile

原文地址:https://www.cnblogs.com/ilifeilong/p/10846883.html

原文地址:https://www.cnblogs.com/quchunhui/p/7611565.html

===流程===

1. 创建一张表

2. 插入10条数据

3. 查看HFile

===操作===

1.创建表

复制代码

package api;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.HColumnDescriptor;
import org.apache.hadoop.hbase.HTableDescriptor;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.Admin;
import org.apache.hadoop.hbase.client.Connection;
import org.apache.hadoop.hbase.client.ConnectionFactory;
import org.apache.hadoop.hbase.client.Durability;
import org.apache.hadoop.hbase.io.compress.Compression;
import org.apache.hadoop.hbase.regionserver.BloomType;

public class create_table_sample1 {
    public static void main(String[] args) throws Exception {
        Configuration conf = HBaseConfiguration.create();
        conf.set("hbase.zookeeper.quorum", "192.168.1.80,192.168.1.81,192.168.1.82");
        Connection connection = ConnectionFactory.createConnection(conf);
        Admin admin = connection.getAdmin();

        HTableDescriptor desc = new HTableDescriptor(TableName.valueOf("TEST1"));
        desc.setMemStoreFlushSize(2097152L);          //2M(默认128M)
        desc.setMaxFileSize(10485760L);               //10M(默认10G)
        desc.setDurability(Durability.SYNC_WAL);   //WAL落盘方式:同步刷盘

        HColumnDescriptor family1 = new HColumnDescriptor(constants.COLUMN_FAMILY_DF.getBytes());
        family1.setTimeToLive(2 * 60 * 60 * 24);     //过期时间
        family1.setMaxVersions(2);                   //版本数
        family1.setBlockCacheEnabled(false);
        desc.addFamily(family1);
        HColumnDescriptor family2 = new HColumnDescriptor(constants.COLUMN_FAMILY_EX.getBytes());
        family2.setTimeToLive(3 * 60 * 60 * 24);     //过期时间
        family2.setMinVersions(2);                   //最小版本数
        family2.setMaxVersions(3);                   //版本数
        family2.setBloomFilterType(BloomType.ROW);   //布隆过滤方式
        family2.setBlocksize(1024);
        family2.setBlockCacheEnabled(false);
        desc.addFamily(family2);

        admin.createTable(desc);
        admin.close();
        connection.close();
    }
}

复制代码

2.插入10条数据

复制代码

package api;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.*;

import java.util.ArrayList;
import java.util.List;
import java.util.UUID;

public class table_put_sample4 {
    public static void main(String[] args) throws Exception {
        Configuration conf = HBaseConfiguration.create();
        conf.set("hbase.zookeeper.quorum", "192.168.1.80,192.168.1.81,192.168.1.82");
        conf.set("hbase.client.write.buffer", "1048576");//1M
        Connection connection = ConnectionFactory.createConnection(conf);
        BufferedMutator table = connection.getBufferedMutator(TableName.valueOf(constants.TABLE_NAME));

        List<Put> puts = new ArrayList<>();
        for(int i = 0; i < 10; i++) {
            Put put = new Put(("row" + UUID.randomUUID().toString()).getBytes());
            put.addColumn(constants.COLUMN_FAMILY_DF.getBytes(), "name".getBytes(), random.getName());
            put.addColumn(constants.COLUMN_FAMILY_DF.getBytes(), "sex".getBytes(), random.getSex());
            put.addColumn(constants.COLUMN_FAMILY_EX.getBytes(), "height".getBytes(), random.getHeight());
            put.addColumn(constants.COLUMN_FAMILY_EX.getBytes(), "weight".getBytes(), random.getWeight());
            puts.add(put);
        }

        table.mutate(puts);
        table.flush();
        table.close();
        connection.close();
    }
}

复制代码

3. 查看HFile

命令:hbase hfile -v -p -m -fhdfs://ns/hbase/data/default/TEST1/5cd31c374a3b30bb859175495cbd6905/df/9df89dc0db7f401e943c5ded6d49d956

复制代码

Scanning -> hdfs://ns/hbase/data/default/TEST1/5cd31c374a3b30bb859175495cbd6905/df/9df89dc0db7f401e943c5ded6d49d956
2017-09-29 03:53:57,233 INFO  [main] hfile.CacheConfig: Created cacheConfig: CacheConfig:disabled
K: row0324f6ce-dec9-474a-b3fd-202b0c482756/df:name/1506670800587/Put/vlen=7/seqid=8 V: wang wu
K: row0324f6ce-dec9-474a-b3fd-202b0c482756/df:sex/1506670800587/Put/vlen=3/seqid=8 V: men
K: row284986a4-66c3-4ac6-96f1-76cbf66ec0b0/df:name/1506670800410/Put/vlen=7/seqid=4 V: wei liu
K: row284986a4-66c3-4ac6-96f1-76cbf66ec0b0/df:sex/1506670800410/Put/vlen=3/seqid=4 V: men
K: row5b3796d7-0d95-4114-b8fe-15a194b87172/df:name/1506670800559/Put/vlen=5/seqid=7 V: li si
K: row5b3796d7-0d95-4114-b8fe-15a194b87172/df:sex/1506670800559/Put/vlen=3/seqid=7 V: men
K: row620c7f4b-cb20-4175-b12b-5f71349ca52e/df:name/1506670800699/Put/vlen=7/seqid=12 V: wang wu
K: row620c7f4b-cb20-4175-b12b-5f71349ca52e/df:sex/1506670800699/Put/vlen=5/seqid=12 V: women
K: row91963615-e76f-4911-be04-fcfb1e47cf64/df:name/1506670800733/Put/vlen=7/seqid=13 V: wei liu
K: row91963615-e76f-4911-be04-fcfb1e47cf64/df:sex/1506670800733/Put/vlen=5/seqid=13 V: women
K: row98e7aeea-bd63-45f3-ad28-690256303b6a/df:name/1506670800677/Put/vlen=7/seqid=11 V: wang wu
K: row98e7aeea-bd63-45f3-ad28-690256303b6a/df:sex/1506670800677/Put/vlen=3/seqid=11 V: men
K: rowa0d3ac08-188a-4869-8dcd-43cd874ae34e/df:name/1506670800476/Put/vlen=7/seqid=5 V: wang wu
K: rowa0d3ac08-188a-4869-8dcd-43cd874ae34e/df:sex/1506670800476/Put/vlen=3/seqid=5 V: men
K: rowd0584d40-bf2c-4f07-90c9-394470cc54c7/df:name/1506670800611/Put/vlen=7/seqid=9 V: wei liu
K: rowd0584d40-bf2c-4f07-90c9-394470cc54c7/df:sex/1506670800611/Put/vlen=5/seqid=9 V: women
K: rowd5e46f02-7d22-444a-a086-f0936ca81728/df:name/1506670800652/Put/vlen=7/seqid=10 V: wang wu
K: rowd5e46f02-7d22-444a-a086-f0936ca81728/df:sex/1506670800652/Put/vlen=3/seqid=10 V: men
K: rowf17bfb40-f658-4b4b-a9da-82abf455f4e6/df:name/1506670800531/Put/vlen=5/seqid=6 V: li si
K: rowf17bfb40-f658-4b4b-a9da-82abf455f4e6/df:sex/1506670800531/Put/vlen=3/seqid=6 V: men
Block index size as per heapsize: 432
reader=hdfs://ns/hbase/data/default/TEST1/5cd31c374a3b30bb859175495cbd6905/df/9df89dc0db7f401e943c5ded6d49d956,
    compression=none,
    cacheConf=CacheConfig:disabled,
    firstKey=row0324f6ce-dec9-474a-b3fd-202b0c482756/df:name/1506670800587/Put,
    lastKey=rowf17bfb40-f658-4b4b-a9da-82abf455f4e6/df:sex/1506670800531/Put,
    avgKeyLen=56,
    avgValueLen=5,
    entries=20,
    length=6440
Trailer:
    fileinfoOffset=1646,
    loadOnOpenDataOffset=1502,
    dataIndexCount=1,
    metaIndexCount=0,
    totalUncomressedBytes=6313,
    entryCount=20,
    compressionCodec=NONE,
    uncompressedDataIndexSize=70,
    numDataIndexLevels=1,
    firstDataBlockOffset=0,
    lastDataBlockOffset=0,
    comparatorClassName=org.apache.hadoop.hbase.KeyValue$KeyComparator,
    encryptionKey=NONE,
    majorVersion=3,
    minorVersion=0
Fileinfo:
    BLOOM_FILTER_TYPE = ROW
    DELETE_FAMILY_COUNT = \x00\x00\x00\x00\x00\x00\x00\x00
    EARLIEST_PUT_TS = \x00\x00\x01^\xCC\x93\xEE\x1A
    KEY_VALUE_VERSION = \x00\x00\x00\x01
    LAST_BLOOM_KEY = rowf17bfb40-f658-4b4b-a9da-82abf455f4e6
    MAJOR_COMPACTION_KEY = \x00
    MAX_MEMSTORE_TS_KEY = \x00\x00\x00\x00\x00\x00\x00\x0D
    MAX_SEQ_ID_KEY = 15
    TIMERANGE = 1506670800410....1506670800733
    hfile.AVG_KEY_LEN = 56
    hfile.AVG_VALUE_LEN = 5
    hfile.CREATE_TIME_TS = \x00\x00\x01^\xCC\x9B\xAD\xCF
    hfile.LASTKEY = \x00'rowf17bfb40-f658-4b4b-a9da-82abf455f4e6\x02dfsex\x00\x00\x01^\xCC\x93\xEE\x93\x04
Mid-key: \x00'row0324f6ce-dec9-474a-b3fd-202b0c482756\x02dfname\x00\x00\x01^\xCC\x93\xEE\xCB\x04
Bloom filter:
    BloomSize: 16
    No of Keys in bloom: 10
    Max Keys for bloom: 13
    Percentage filled: 77%
    Number of chunks: 1
    Comparator: RawBytesComparator
Delete Family Bloom filter:
    Not present
Scanned kv count -> 20

复制代码

===Tips===:

1. HFile放在哪里了?

查看方式一:

可以通过HBase的web页面查看HFile名称及路径。步骤如下:

① 打开Web管理页面,选择表

 ② 选择HRegion Server

③ 选择Region

④ 查看HFile路径

HFile是以列族为单位的,我建立的表有两个列族,所以这里就有两个HFile

查看方式二:

直接使用hdfs命令,逐层查看

命令样例:hadoop fs -ls /hbase/data/default

2. 为什么能scan到数据,却没有hfile?

通过程序向HBase插入数据之后,能够scan到数据,不过hdfs上确没有hfile。

如下图所示:scan 'TEST1' 能够看到表中有数据。

从Web页面上却看不到hfile

原因:

插入的数据在memstore(写缓存)中,还没有flush到hdfs上。

解决办法:

手动flush。在hbase shell环境下,有一个flush命令,可以手动刷某张表

flush之后,就可以看到hfile了

--END--

emp表数据结构

hbase(main):098:0> scan 'emp'
ROW                                                              COLUMN+CELL                                                                                                                                                                                 
 row1                                                            column=mycf:depart, timestamp=1555846776542, value=research                                                                                                                                 
 row1                                                            column=mycf:id, timestamp=1555846776590, value=7876                                                                                                                                         
 row1                                                            column=mycf:job, timestamp=1555846776566, value=clerk                                                                                                                                       
 row1                                                            column=mycf:locate, timestamp=1555846776618, value=dallas                                                                                                                                   
 row1                                                            column=mycf:name, timestamp=1555846776511, value=adams                                                                                                                                      
 row2                                                            column=mycf:depart, timestamp=1555846776687, value=sales                                                                                                                                    
 row2                                                            column=mycf:id, timestamp=1555846776736, value=7499                                                                                                                                         
 row2                                                            column=mycf:job, timestamp=1555846776712, value=salesman                                                                                                                                    
 row2                                                            column=mycf:locate, timestamp=1555846776770, value=chicago                                                                                                                                  
 row2                                                            column=mycf:name, timestamp=1555846776662, value=allen                                                                                                                                      
 row3                                                            column=mycf:depart, timestamp=1555846776838, value=sales                                                                                                                                    
 row3                                                            column=mycf:id, timestamp=1555846776887, value=7698                                                                                                                                         
 row3                                                            column=mycf:job, timestamp=1555846776863, value=manager                                                                                                                                     
 row3                                                            column=mycf:locate, timestamp=1555846776912, value=chicago                                                                                                                                  
 row3                                                            column=mycf:name, timestamp=1555846776806, value=blake                                                                                                                                      
 row4                                                            column=mycf:depart, timestamp=1555846776976, value=accounting                                                                                                                               
 row4                                                            column=mycf:id, timestamp=1555846777027, value=7782                                                                                                                                         
 row4                                                            column=mycf:job, timestamp=1555846777002, value=manager                                                                                                                                     
 row4                                                            column=mycf:locate, timestamp=1555846777086, value=new york                                                                                                                                 
 row4                                                            column=mycf:name, timestamp=1555846776952, value=clark                                                                                                                                      
 row5                                                            column=mycf:depart, timestamp=1555846777146, value=research                                                                                                                                 
 row5                                                            column=mycf:id, timestamp=1555846777193, value=7902                                                                                                                                         
 row5                                                            column=mycf:job, timestamp=1555846777169, value=analyst                                                                                                                                     
 row5                                                            column=mycf:locate, timestamp=1555846777218, value=dallas                                                                                                                                   
 row5                                                            column=mycf:name, timestamp=1555846777121, value=ford                                                                                                                                       
 row6                                                            column=mycf:depart, timestamp=1555846777277, value=sales                                                                                                                                    
 row6                                                            column=mycf:id, timestamp=1555846777324, value=7900                                                                                                                                         
 row6                                                            column=mycf:job, timestamp=1555846777301, value=clerk                                                                                                                                       
 row6                                                            column=mycf:locate, timestamp=1555846777355, value=chicago                                                                                                                                  
 row6                                                            column=mycf:name, timestamp=1555846777253, value=james                                                                                                                                      
 row7                                                            column=mycf:depart, timestamp=1555846777416, value=research                                                                                                                                 
 row7                                                            column=mycf:id, timestamp=1555846777465, value=7566                                                                                                                                         
 row7                                                            column=mycf:job, timestamp=1555846777441, value=manager                                                                                                                                     
 row7                                                            column=mycf:locate, timestamp=1555846777491, value=dallas                                                                                                                                   
 row7                                                            column=mycf:name, timestamp=1555846777390, value=jones                                                                                                                                      
 row8                                                            column=mycf:depart, timestamp=1555846777556, value=accounting                                                                                                                               
 row8                                                            column=mycf:id, timestamp=1555846777604, value=7839                                                                                                                                         
 row8                                                            column=mycf:job, timestamp=1555846777581, value=president                                                                                                                                   
 row8                                                            column=mycf:locate, timestamp=1555846777628, value=new york                                                                                                                                 
 row8                                                            column=mycf:name, timestamp=1555846777526, value=king                                                                                                                                       
8 row(s) in 0.0490 seconds

工具

org.apache.hadoop.hbase.io.hfile.HFile

# hbase org.apache.hadoop.hbase.io.hfile.HFile
usage: HFile [-a] [-b] [-e] [-f <arg>] [-k] [-m] [-p] [-r <arg>] [-s] [-v]
 -a,--checkfamily    Enable family check
 -b,--printblocks    Print block index meta data
 -e,--printkey       Print keys
 -f,--file <arg>     File to scan. Pass full-path; e.g.
                     hdfs://a:9000/hbase/.META./12/34
 -k,--checkrow       Enable row order check; looks for out-of-order keys
 -m,--printmeta      Print meta data of file
 -p,--printkv        Print key/value pairs
 -r,--region <arg>   Region to scan. Pass region name; e.g. '.META.,,1'
 -s,--stats          Print statistics
 -v,--verbose        Verbose output; emits file and meta data delimiters

或者

# hbase hfile
usage: HFile [-a] [-b] [-e] [-f <arg>] [-k] [-m] [-p] [-r <arg>] [-s] [-v]
 -a,--checkfamily    Enable family check
 -b,--printblocks    Print block index meta data
 -e,--printkey       Print keys
 -f,--file <arg>     File to scan. Pass full-path; e.g.
                     hdfs://a:9000/hbase/.META./12/34
 -k,--checkrow       Enable row order check; looks for out-of-order keys
 -m,--printmeta      Print meta data of file
 -p,--printkv        Print key/value pairs
 -r,--region <arg>   Region to scan. Pass region name; e.g. '.META.,,1'
 -s,--stats          Print statistics
 -v,--verbose        Verbose output; emits file and meta data delimiters
# hbase org.apache.hadoop.hbase.io.hfile.HFile -f /hbase/emp/2dddf0f7140e120718b6d4356dfcee85/mycf/cab01eb30627452e8e38defad2144996 -e -p -m -s
19/05/10 21:39:27 INFO hfile.CacheConfig: Allocating LruBlockCache with maximum size 511.0m
K: row1/mycf:depart/1555846776542/Put/vlen=8 V: research
K: row1/mycf:id/1555846776590/Put/vlen=4 V: 7876
K: row1/mycf:job/1555846776566/Put/vlen=5 V: clerk
K: row1/mycf:locate/1555846776618/Put/vlen=6 V: dallas
K: row1/mycf:name/1555846776511/Put/vlen=5 V: adams
K: row2/mycf:depart/1555846776687/Put/vlen=5 V: sales
K: row2/mycf:id/1555846776736/Put/vlen=4 V: 7499
K: row2/mycf:job/1555846776712/Put/vlen=8 V: salesman
K: row2/mycf:locate/1555846776770/Put/vlen=7 V: chicago
K: row2/mycf:name/1555846776662/Put/vlen=5 V: allen
K: row3/mycf:depart/1555846776838/Put/vlen=5 V: sales
K: row3/mycf:id/1555846776887/Put/vlen=4 V: 7698
K: row3/mycf:job/1555846776863/Put/vlen=7 V: manager
K: row3/mycf:locate/1555846776912/Put/vlen=7 V: chicago
K: row3/mycf:name/1555846776806/Put/vlen=5 V: blake
K: row4/mycf:depart/1555846776976/Put/vlen=10 V: accounting
K: row4/mycf:id/1555846777027/Put/vlen=4 V: 7782
K: row4/mycf:job/1555846777002/Put/vlen=7 V: manager
K: row4/mycf:locate/1555846777086/Put/vlen=8 V: new york
K: row4/mycf:name/1555846776952/Put/vlen=5 V: clark
K: row5/mycf:depart/1555846777146/Put/vlen=8 V: research
K: row5/mycf:id/1555846777193/Put/vlen=4 V: 7902
K: row5/mycf:job/1555846777169/Put/vlen=7 V: analyst
K: row5/mycf:locate/1555846777218/Put/vlen=6 V: dallas
K: row5/mycf:name/1555846777121/Put/vlen=4 V: ford
K: row6/mycf:depart/1555846777277/Put/vlen=5 V: sales
K: row6/mycf:id/1555846777324/Put/vlen=4 V: 7900
K: row6/mycf:job/1555846777301/Put/vlen=5 V: clerk
K: row6/mycf:locate/1555846777355/Put/vlen=7 V: chicago
K: row6/mycf:name/1555846777253/Put/vlen=5 V: james
K: row7/mycf:depart/1555846777416/Put/vlen=8 V: research
K: row7/mycf:id/1555846777465/Put/vlen=4 V: 7566
K: row7/mycf:job/1555846777441/Put/vlen=7 V: manager
K: row7/mycf:locate/1555846777491/Put/vlen=6 V: dallas
K: row7/mycf:name/1555846777390/Put/vlen=5 V: jones
K: row8/mycf:depart/1555846777556/Put/vlen=10 V: accounting
K: row8/mycf:id/1555846777604/Put/vlen=4 V: 7839
K: row8/mycf:job/1555846777581/Put/vlen=9 V: president
K: row8/mycf:locate/1555846777628/Put/vlen=8 V: new york
K: row8/mycf:name/1555846777526/Put/vlen=4 V: king
Block index size as per heapsize: 416
reader=/hbase/emp/2dddf0f7140e120718b6d4356dfcee85/mycf/cab01eb30627452e8e38defad2144996,
    compression=none,
    cacheConf=CacheConfig:enabled [cacheDataOnRead=true] [cacheDataOnWrite=false] [cacheIndexesOnWrite=false] [cacheBloomsOnWrite=false] [cacheEvictOnClose=false] [cacheCompressed=false],
    firstKey=row1/mycf:depart/1555846776542/Put,
    lastKey=row8/mycf:name/1555846777526/Put,
    avgKeyLen=24,
    avgValueLen=5,
    entries=40,
    length=2155
Trailer:
    fileinfoOffset=1678,
    loadOnOpenDataOffset=1591,
    dataIndexCount=1,
    metaIndexCount=0,
    totalUncomressedBytes=2092,
    entryCount=40,
    compressionCodec=NONE,
    uncompressedDataIndexSize=39,
    numDataIndexLevels=1,
    firstDataBlockOffset=0,
    lastDataBlockOffset=0,
    comparatorClassName=org.apache.hadoop.hbase.KeyValue$KeyComparator,
    version=2
Fileinfo:
    KEY_VALUE_VERSION = \x00\x00\x00\x01
    MAJOR_COMPACTION_KEY = \x00
    MAX_MEMSTORE_TS_KEY = \x00\x00\x00\x00\x00\x00\x00\x00
    MAX_SEQ_ID_KEY = 7099
    TIMERANGE = 1555846776511....1555846777628
    hfile.AVG_KEY_LEN = 24
    hfile.AVG_VALUE_LEN = 5
    hfile.LASTKEY = \x00\x04row8\x04mycfname\x00\x00\x01j?\xB1\xCA\xB6\x04
Mid-key: \x00\x04row1\x04mycfdepart\x00\x00\x01j?\xB1\xC6\xDE\x04
Bloom filter:
    Not present
Stats:
Key length: count: 40    min: 22    max: 26    mean: 24.2
Val length: count: 40    min: 4    max: 10    mean: 5.975
Row size (bytes): count: 8    min: 187    max: 196    mean: 190.875
Row size (columns): count: 8    min: 5    max: 5    mean: 5.0
Key of biggest row: row8
Scanned kv count -> 40

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值