1 在集群中认证hbase.keytab
1.1 认证kerberos
[root@linux5 ~]# kinit -kt /sga/keytab/hadoop.keytab hbase/linux5
1.2 验证认证结果
1.2.1 登录hbase
[root@linux5 ~]# hbase shell
Java HotSpot(TM) 64-Bit Server VM warning: Using incremental CMS is deprecated and will likely be removed in a future release
19/05/15 10:21:39 INFO Configuration.deprecation: hadoop.native.lib is deprecated. Instead, use io.native.lib.available
HBase Shell; enter 'help<RETURN>' for list of supported commands.
Type "exit<RETURN>" to leave the HBase Shell
Version 1.2.0-cdh5.14.2, rUnknown, Tue Mar 27 13:32:17 PDT 2018
1.2.2 查询hbase表
hbase(main):001:0> list
TABLE
KYLIN_FL8VZSKL4R
kylin_metadata
kylin_metadata_acl
kylin_metadata_user
4 row(s) in 0.2280 seconds
=> ["KYLIN_FL8VZSKL4R", "kylin_metadata", "kylin_metadata_acl", "kylin_metadata_user"]
可以查询到hbase的表,认证通过
2 Hbase表备份
2.1 备份数据
2.1.1 全量数据备份
[root@linux5 ~]# hbase org.apache.hadoop.hbase.mapreduce.Export kylin_metadata hdfs://linux1:8020/hbase/bak/.hbase-data/kylin_metadata
Java HotSpot(TM) 64-Bit Server VM warning: Using incremental CMS is deprecated and will likely be removed in a future release
19/05/15 10:24:01 INFO mapreduce.Export: versions=1, starttime=0, endtime=9223372036854775807, keepDeletedCells=false
19/05/15 10:24:02 INFO zookeeper.RecoverableZooKeeper: Process identifier=hconnection-0x3401a114 connecting to ZooKeeper ensemble=linux3:2181,linux5:2181,linux4:2181
19/05/15 10:24:02 INFO zookeeper.ZooKeeper: Client environment:zookeeper.version=3.4.5-cdh5.14.2--1, built on 03/27/2018 20:39 GMT
19/05/15 10:24:02 INFO zookeeper.ZooKeeper: Client environment:host.name=linux5
19/05/15 10:24:02 INFO zookeeper.ZooKeeper: Client environment:java.version=1.8.0_141
19/05/15 10:24:02 INFO zookeeper.ZooKeeper: Client environment:java.vendor=Oracle Corporation
19/05/15 10:24:02 INFO zookeeper.ZooKeeper: Client environment:java.home=/usr/java/jdk1.8.0_141-cloudera/jre
19/05/15 10:24:02 INFO zookeeper.ZooKeeper: Client environment:java.class.path=(一些jar包的调用,省略)
19/05/15 10:24:02 INFO zookeeper.ZooKeeper: Client environment:java.library.path=/opt/cloudera/parcels/CDH-5.14.2-1.cdh5.14.2.p0.3/lib/hadoop/lib/native:/opt/cloudera/parcels/CDH-5.14.2-1.cdh5.14.2.p0.3/lib/hbase/bin/../lib/native/Linux-amd64-64
19/05/15 10:24:02 INFO zookeeper.ZooKeeper: Client environment:java.io.tmpdir=/tmp
19/05/15 10:24:02 INFO zookeeper.ZooKeeper: Client environment:java.compiler=<NA>
19/05/15 10:24:02 INFO zookeeper.ZooKeeper: Client environment:os.name=Linux
19/05/15 10:24:02 INFO zookeeper.ZooKeeper: Client environment:os.arch=amd64
19/05/15 10:24:02 INFO zookeeper.ZooKeeper: Client environment:os.version=2.6.32-642.el6.x86_64
19/05/15 10:24:02 INFO zookeeper.ZooKeeper: Client environment:user.name=root
19/05/15 10:24:02 INFO zookeeper.ZooKeeper: Client environment:user.home=/root
19/05/15 10:24:02 INFO zookeeper.ZooKeeper: Client environment:user.dir=/root
19/05/15 10:24:02 INFO zookeeper.ZooKeeper: Initiating client connection, connectString=linux3:2181,linux5:2181,linux4:2181 sessionTimeout=60000 watcher=hconnection-0x3401a1140x0, quorum=linux3:2181,linux5:2181,linux4:2181, baseZNode=/hbase
19/05/15 10:24:02 INFO zookeeper.Login: Client successfully logged in.
19/05/15 10:24:02 INFO zookeeper.Login: TGT refresh thread started.
19/05/15 10:24:02 INFO zookeeper.Login: TGT valid starting at: Wed May 15 10:20:55 CST 2019
19/05/15 10:24:02 INFO zookeeper.Login: TGT expires: Thu May 16 10:20:55 CST 2019
19/05/15 10:24:02 INFO zookeeper.Login: TGT refresh sleeping until: Thu May 16 06:25:59 CST 2019
19/05/15 10:24:02 INFO client.ZooKeeperSaslClient: Client will use GSSAPI as SASL mechanism.
19/05/15 10:24:02 INFO zookeeper.ClientCnxn: Opening socket connection to server linux3/192.168.0.203:2181. Will attempt to SASL-authenticate using Login Context section 'Client'
19/05/15 10:24:02 INFO zookeeper.ClientCnxn: Socket connection established, initiating session, client: /192.168.0.205:46916, server: linux3/192.168.0.203:2181
19/05/15 10:24:02 INFO zookeeper.ClientCnxn: Session establishment complete on server linux3/192.168.0.203:2181, sessionid = 0x16ab566151f0585, negotiated timeout = 60000
19/05/15 10:24:02 INFO zookeeper.RecoverableZooKeeper: Process identifier=TokenUtil-getAuthToken connecting to ZooKeeper ensemble=linux3:2181,linux5:2181,linux4:2181
19/05/15 10:24:02 INFO zookeeper.ZooKeeper: Initiating client connection, connectString=linux3:2181,linux5:2181,linux4:2181 sessionTimeout=60000 watcher=TokenUtil-getAuthToken0x0, quorum=linux3:2181,linux5:2181,linux4:2181, baseZNode=/hbase
19/05/15 10:24:02 INFO client.ZooKeeperSaslClient: Client will use GSSAPI as SASL mechanism.
19/05/15 10:24:02 INFO zookeeper.ClientCnxn: Opening socket connection to server linux3/192.168.0.203:2181. Will attempt to SASL-authenticate using Login Context section 'Client'
19/05/15 10:24:02 INFO zookeeper.ClientCnxn: Socket connection established, initiating session, client: /192.168.0.205:46920, server: linux3/192.168.0.203:2181
19/05/15 10:24:02 INFO zookeeper.ClientCnxn: Session establishment complete on server linux3/192.168.0.203:2181, sessionid = 0x16ab566151f0586, negotiated timeout = 60000
19/05/15 10:24:02 INFO zookeeper.ZooKeeper: Session: 0x16ab566151f0586 closed
19/05/15 10:24:02 INFO zookeeper.ClientCnxn: EventThread shut down
19/05/15 10:24:02 INFO client.ConnectionManager$HConnectionImplementation: Closing zookeeper sessionid=0x16ab566151f0585
19/05/15 10:24:02 INFO zookeeper.ZooKeeper: Session: 0x16ab566151f0585 closed
19/05/15 10:24:02 INFO zookeeper.ClientCnxn: EventThread shut down
19/05/15 10:24:03 INFO client.RMProxy: Connecting to ResourceManager at linux5/192.168.0.205:8032
19/05/15 10:24:03 INFO hdfs.DFSClient: Created token for hbase: HDFS_DELEGATION_TOKEN owner=hbase/linux5@HADOOP.COM, renewer=yarn, realUser=, issueDate=1557887043244, maxDate=1558491843244, sequenceNumber=200, masterKeyId=15 on 192.168.0.201:8020
19/05/15 10:24:03 INFO security.TokenCache: Got dt for hdfs://linux1:8020; Kind: HDFS_DELEGATION_TOKEN, Service: 192.168.0.201:8020, Ident: (token for hbase: HDFS_DELEGATION_TOKEN owner=hbase/linux5@HADOOP.COM, renewer=yarn, realUser=, issueDate=1557887043244, maxDate=1558491843244, sequenceNumber=200, masterKeyId=15)
19/05/15 10:24:03 INFO hdfs.DFSClient: Created token for hbase: HDFS_DELEGATION_TOKEN owner=hbase/linux5@HADOOP.COM, renewer=yarn, realUser=, issueDate=1557887043415, maxDate=1558491843415, sequenceNumber=28, masterKeyId=6 on 192.168.0.205:8020
19/05/15 10:24:03 INFO security.TokenCache: Got dt for hdfs://linux5:8020; Kind: HDFS_DELEGATION_TOKEN, Service: 192.168.0.205:8020, Ident: (token for hbase: HDFS_DELEGATION_TOKEN owner=hbase/linux5@HADOOP.COM, renewer=yarn, realUser=, issueDate=1557887043415, maxDate=1558491843415, sequenceNumber=28, masterKeyId=6)
19/05/15 10:24:05 INFO zookeeper.RecoverableZooKeeper: Process identifier=hconnection-0x1d01dfa5 connecting to ZooKeeper ensemble=linux3:2181,linux5:2181,linux4:2181
19/05/15 10:24:05 INFO zookeeper.ZooKeeper: Initiating client connection, connectString=linux3:2181,linux5:2181,linux4:2181 sessionTimeout=60000 watcher=hconnection-0x1d01dfa50x0, quorum=linux3:2181,linux5:2181,linux4:2181, baseZNode=/hbase
19/05/15 10:24:05 INFO client.ZooKeeperSaslClient: Client will use GSSAPI as SASL mechanism.
19/05/15 10:24:05 INFO zookeeper.ClientCnxn: Opening socket connection to server linux5/192.168.0.205:2181. Will attempt to SASL-authenticate using Login Context section 'Client'
19/05/15 10:24:05 INFO zookeeper.ClientCnxn: Socket connection established, initiating session, client: /192.168.0.205:53414, server: linux5/192.168.0.205:2181
19/05/15 10:24:05 INFO zookeeper.ClientCnxn: Session establishment complete on server linux5/192.168.0.205:2181, sessionid = 0x26ab56619c405ac, negotiated timeout = 60000
19/05/15 10:24:05 INFO util.RegionSizeCalculator: Calculating region sizes for table "KYLIN_FL8VZSKL4R".
19/05/15 10:24:05 INFO client.ConnectionManager$HConnectionImplementation: Closing master protocol: MasterService
19/05/15 10:24:05 INFO client.ConnectionManager$HConnectionImplementation: Closing zookeeper sessionid=0x26ab56619c405ac
19/05/15 10:24:05 INFO zookeeper.ZooKeeper: Session: 0x26ab56619c405ac closed
19/05/15 10:24:05 INFO zookeeper.ClientCnxn: EventThread shut down
19/05/15 10:24:05 INFO mapreduce.JobSubmitter: number of splits:1
19/05/15 10:24:05 INFO Configuration.deprecation: io.bytes.per.checksum is deprecated. Instead, use dfs.bytes-per-checksum
19/05/15 10:24:06 INFO mapreduce.JobSubmitter: Submitting tokens for job: job_1557885460403_0001
19/05/15 10:24:06 INFO mapreduce.JobSubmitter: Kind: HDFS_DELEGATION_TOKEN, Service: 192.168.0.201:8020, Ident: (token for hbase: HDFS_DELEGATION_TOKEN owner=hbase/linux5@HADOOP.COM, renewer=yarn, realUser=, issueDate=1557887043244, maxDate=1558491843244, sequenceNumber=200, masterKeyId=15)
19/05/15 10:24:06 INFO mapreduce.JobSubmitter: Kind: HDFS_DELEGATION_TOKEN, Service: 192.168.0.205:8020, Ident: (token for hbase: HDFS_DELEGATION_TOKEN owner=hbase/linux5@HADOOP.COM, renewer=yarn, realUser=, issueDate=1557887043415, maxDate=1558491843415, sequenceNumber=28, masterKeyId=6)
19/05/15 10:24:06 INFO mapreduce.JobSubmitter: Kind: HBASE_AUTH_TOKEN, Service: 71f83408-fd3d-4a52-a1f4-d8d71aa707b3, Ident: (org.apache.hadoop.hbase.security.token.AuthenticationTokenIdentifier@0)
19/05/15 10:24:06 INFO impl.YarnClientImpl: Submitted application application_1557885460403_0001
19/05/15 10:24:06 INFO mapreduce.Job: The url to track the job: http://linux5:8088/proxy/application_1557885460403_0001/
19/05/15 10:24:06 INFO mapreduce.Job: Running job: job_1557885460403_0001
19/05/15 10:24:17 INFO mapreduce.Job: Job job_1557885460403_0001 running in uber mode : false
19/05/15 10:24:17 INFO mapreduce.Job: map 0% reduce 0%
19/05/15 10:24:44 INFO mapreduce.Job: map 100% reduce 0%
19/05/15 10:24:44 INFO mapreduce.Job: Job job_1557885460403_0001 completed successfully
19/05/15 10:24:44 INFO mapreduce.Job: Counters: 30
File System Counters
FILE: Number of bytes read=0
FILE: Number of bytes written=189434
FILE: Number of read operations=0
FILE: Number of large read operations=0
FILE: Number of write operations=0
HDFS: Number of bytes read=131
HDFS: Number of bytes written=114
HDFS: Number of read operations=4
HDFS: Number of large read operations=0
HDFS: Number of write operations=2
Job Counters
Launched map tasks=1
Data-local map tasks=1
Total time spent by all maps in occupied slots (ms)=23974
Total time spent by all reduces in occupied slots (ms)=0
Total time spent by all map tasks (ms)=23974
Total vcore-milliseconds taken by all map tasks=23974
Total megabyte-milliseconds taken by all map tasks=24549376
Map-Reduce Framework
Map input records=0
Map output records=0
Input split bytes=131
Spilled Records=0
Failed Shuffles=0
Merged Map outputs=0
GC time elapsed (ms)=0
CPU time spent (ms)=11720
Physical memory (bytes) snapshot=414326784
Virtual memory (bytes) snapshot=2806788096
Total committed heap usage (bytes)=610271232
File Input Format Counters
Bytes Read=0
File Output Format Counters
Bytes Written=114
出现标黄字样表示备份成功
2.1.2 增量数据备份
获取时间戳
[root@linux5 ~]# date +%s
增量开始时间戳
[root@linux5 ~]# date -d ‘05/17/2019 00:00:00’ +”%s”
增量结束时间戳
[root@linux5 ~]# date -d ‘05/18/2019 00:00:00’ +”%s”
增量备份命令
[root@linux5 ~]# hbase org.apache.hadoop.hbase.mapreduce.Export emp hdfs://linux1:8020/hbase/bak/.hbase-data/emp_1 1 1557987579726
2.2 备份快照
2.2.1 生成表的快照
hbase(main):003:0> snapshot 'kylin_metadata', 'kylin_metadata'
0 row(s) in 0.6880 seconds
2.2.2 查看快照
hbase(main):001:0> list_snapshots
SNAPSHOT TABLE + CREATION TIME
KYLIN_2U5W9QDFNN KYLIN_2U5W9QDFNN (Tue May 14 09:26:08 +0800 2019)
1 row(s) in 0.2920 seconds
=> ["KYLIN_2U5W9QDFNN"]
2.2.3 拷贝快照到备份集群
[root@linux1 ~]# hadoop distcp hdfs://linux5:8020/hbase/.hbase-snapshot/kylin_metadata hdfs://linux1:8020/hbase/bak/.hbase-snapshot/
2.3备份表结构
hbase(main):002:0> describe 'kylin_metadata'
Table kylin_metadata is ENABLED
kylin_metadata, {TABLE_ATTRIBUTES => {METADATA => {'UUID' => '19bf91ff-c78c-4a73-a579-b5b5c25ce56f'}}
COLUMN FAMILIES DESCRIPTION
{NAME => 'f', BLOOMFILTER => 'ROW', VERSIONS => '1', IN_MEMORY => 'true', KEEP_DELETED_CELLS => 'FALSE', DATA_BLOCK_ENCODING => 'NONE', TTL => 'FOREVER', COMPRESSION => 'NONE', MIN_VERSIONS =
> '0', BLOCKCACHE => 'true', BLOCKSIZE => '65536', REPLICATION_SCOPE => '0'}
1 row(s) in 0.1340 seconds
保存建表语句
create 'kylin_metadata3', {NAME => 'f', BLOOMFILTER => 'ROW', VERSIONS => '1', IN_MEMORY => 'true', KEEP_DELETED_CELLS => 'FALSE', DATA_BLOCK_ENCODING => 'NONE', COMPRESSION => 'NONE', MIN_VERSIONS => '0', BLOCKCACHE => 'true', BLOCKSIZE => '65536', REPLICATION_SCOPE => '0'}
注意:需去掉查询出来的标黄内容
3 验证备份
3.1 查看备份数据
[root@linux5 ~]# hdfs dfs -ls hdfs://linux1:8020/hbase/bak/.hbase-data
Found 1 items
drwxr-xr-x - hbase supergroup 0 2019-05-16 11:40 hdfs://linux1:8020/hbase/bak/.hbase-data/kylin_metadata
3.2 查看备份快照
[root@linux1 ~]# hdfs dfs -ls hdfs://linux1:8020/hbase/bak/.hbase-snapshot
Found 1 items
drwxr-xr-x - hbase supergroup 0 2019-05-16 11:33 hdfs://linux1:8020/hbase/bak/.hbase-snapshot/kylin_metadata
4 恢复备份
4.1 从Hfile中恢复数据
[root@linux5 ~]# hbase org.apache.hadoop.hbase.mapreduce.Import kylin_metadata3 hdfs://linux1:8020/hbase/bak/.hbase-data/kylin_metadata
Java HotSpot(TM) 64-Bit Server VM warning: Using incremental CMS is deprecated and will likely be removed in a future release
19/05/16 11:50:27 INFO zookeeper.RecoverableZooKeeper: Process identifier=hconnection-0x23fb172e connecting to ZooKeeper ensemble=linux3:2181,linux5:2181,linux4:2181
19/05/16 11:50:27 INFO zookeeper.ZooKeeper: Client environment:zookeeper.version=3.4.5-cdh5.14.2--1, built on 03/27/2018 20:39 GMT
19/05/16 11:50:27 INFO zookeeper.ZooKeeper: Client environment:host.name=linux5
19/05/16 11:50:27 INFO zookeeper.ZooKeeper: Client environment:java.version=1.8.0_141
19/05/16 11:50:27 INFO zookeeper.ZooKeeper: Client environment:java.vendor=Oracle Corporation
19/05/16 11:50:27 INFO zookeeper.ZooKeeper: Client environment:java.home=/usr/java/jdk1.8.0_141-cloudera/jre
19/05/16 11:50:27 INFO zookeeper.ZooKeeper: Client environment:java.library.path=/opt/cloudera/parcels/CDH-5.14.2-1.cdh5.14.2.p0.3/lib/hadoop/lib/native:/opt/cloudera/parcels/CDH-5.14.2-1.cdh5.14.2.p0.3/lib/hbase/bin/../lib/native/Linux-amd64-64
19/05/16 11:50:27 INFO zookeeper.ZooKeeper: Client environment:java.io.tmpdir=/tmp
19/05/16 11:50:27 INFO zookeeper.ZooKeeper: Client environment:java.compiler=<NA>
19/05/16 11:50:27 INFO zookeeper.ZooKeeper: Client environment:os.name=Linux
19/05/16 11:50:27 INFO zookeeper.ZooKeeper: Client environment:os.arch=amd64
19/05/16 11:50:27 INFO zookeeper.ZooKeeper: Client environment:os.version=2.6.32-642.el6.x86_64
19/05/16 11:50:27 INFO zookeeper.ZooKeeper: Client environment:user.name=root
19/05/16 11:50:27 INFO zookeeper.ZooKeeper: Client environment:user.home=/root
19/05/16 11:50:27 INFO zookeeper.ZooKeeper: Client environment:user.dir=/root
19/05/16 11:50:27 INFO zookeeper.ZooKeeper: Initiating client connection, connectString=linux3:2181,linux5:2181,linux4:2181 sessionTimeout=60000 watcher=hconnection-0x23fb172e0x0, quorum=linux3:2181,linux5:2181,linux4:2181, baseZNode=/hbase
19/05/16 11:50:27 INFO zookeeper.Login: Client successfully logged in.
19/05/16 11:50:27 INFO zookeeper.Login: TGT refresh thread started.
19/05/16 11:50:27 INFO zookeeper.Login: TGT valid starting at: Thu May 16 09:05:30 CST 2019
19/05/16 11:50:27 INFO zookeeper.Login: TGT expires: Fri May 17 09:05:30 CST 2019
19/05/16 11:50:27 INFO zookeeper.Login: TGT refresh sleeping until: Fri May 17 05:08:27 CST 2019
19/05/16 11:50:27 INFO client.ZooKeeperSaslClient: Client will use GSSAPI as SASL mechanism.
19/05/16 11:50:27 INFO zookeeper.ClientCnxn: Opening socket connection to server linux5/192.168.0.205:2181. Will attempt to SASL-authenticate using Login Context section 'Client'
19/05/16 11:50:27 INFO zookeeper.ClientCnxn: Socket connection established, initiating session, client: /192.168.0.205:34542, server: linux5/192.168.0.205:2181
19/05/16 11:50:27 INFO zookeeper.ClientCnxn: Session establishment complete on server linux5/192.168.0.205:2181, sessionid = 0x26aba53bbf40681, negotiated timeout = 60000
19/05/16 11:50:27 INFO zookeeper.RecoverableZooKeeper: Process identifier=TokenUtil-getAuthToken connecting to ZooKeeper ensemble=linux3:2181,linux5:2181,linux4:2181
19/05/16 11:50:27 INFO zookeeper.ZooKeeper: Initiating client connection, connectString=linux3:2181,linux5:2181,linux4:2181 sessionTimeout=60000 watcher=TokenUtil-getAuthToken0x0, quorum=linux3:2181,linux5:2181,linux4:2181, baseZNode=/hbase
19/05/16 11:50:27 INFO client.ZooKeeperSaslClient: Client will use GSSAPI as SASL mechanism.
19/05/16 11:50:27 INFO zookeeper.ClientCnxn: Opening socket connection to server linux4/192.168.0.204:2181. Will attempt to SASL-authenticate using Login Context section 'Client'
19/05/16 11:50:27 INFO zookeeper.ClientCnxn: Socket connection established, initiating session, client: /192.168.0.205:44105, server: linux4/192.168.0.204:2181
19/05/16 11:50:27 INFO zookeeper.ClientCnxn: Session establishment complete on server linux4/192.168.0.204:2181, sessionid = 0x36aba53b5070657, negotiated timeout = 60000
19/05/16 11:50:27 INFO zookeeper.ZooKeeper: Session: 0x36aba53b5070657 closed
19/05/16 11:50:27 INFO zookeeper.ClientCnxn: EventThread shut down
19/05/16 11:50:27 INFO client.ConnectionManager$HConnectionImplementation: Closing zookeeper sessionid=0x26aba53bbf40681
19/05/16 11:50:27 INFO zookeeper.ZooKeeper: Session: 0x26aba53bbf40681 closed
19/05/16 11:50:27 INFO zookeeper.ClientCnxn: EventThread shut down
19/05/16 11:50:27 INFO client.RMProxy: Connecting to ResourceManager at linux5/192.168.0.205:8032
19/05/16 11:50:28 INFO hdfs.DFSClient: Created token for hbase: HDFS_DELEGATION_TOKEN owner=hbase/linux5@HADOOP.COM, renewer=yarn, realUser=, issueDate=1557978628184, maxDate=1558583428184, sequenceNumber=44, masterKeyId=8 on 192.168.0.205:8020
19/05/16 11:50:28 INFO security.TokenCache: Got dt for hdfs://linux5:8020; Kind: HDFS_DELEGATION_TOKEN, Service: 192.168.0.205:8020, Ident: (token for hbase: HDFS_DELEGATION_TOKEN owner=hbase/linux5@HADOOP.COM, renewer=yarn, realUser=, issueDate=1557978628184, maxDate=1558583428184, sequenceNumber=44, masterKeyId=8)
19/05/16 11:50:29 INFO hdfs.DFSClient: Created token for hbase: HDFS_DELEGATION_TOKEN owner=hbase/linux5@HADOOP.COM, renewer=yarn, realUser=, issueDate=1557978629661, maxDate=1558583429661, sequenceNumber=216, masterKeyId=17 on 192.168.0.201:8020
19/05/16 11:50:29 INFO security.TokenCache: Got dt for hdfs://linux1:8020; Kind: HDFS_DELEGATION_TOKEN, Service: 192.168.0.201:8020, Ident: (token for hbase: HDFS_DELEGATION_TOKEN owner=hbase/linux5@HADOOP.COM, renewer=yarn, realUser=, issueDate=1557978629661, maxDate=1558583429661, sequenceNumber=216, masterKeyId=17)
19/05/16 11:50:29 INFO input.FileInputFormat: Total input paths to process : 1
19/05/16 11:50:29 INFO mapreduce.JobSubmitter: number of splits:1
19/05/16 11:50:30 INFO mapreduce.JobSubmitter: Submitting tokens for job: job_1557904315077_0013
19/05/16 11:50:30 INFO mapreduce.JobSubmitter: Kind: HDFS_DELEGATION_TOKEN, Service: 192.168.0.205:8020, Ident: (token for hbase: HDFS_DELEGATION_TOKEN owner=hbase/linux5@HADOOP.COM, renewer=yarn, realUser=, issueDate=1557978628184, maxDate=1558583428184, sequenceNumber=44, masterKeyId=8)
19/05/16 11:50:30 INFO mapreduce.JobSubmitter: Kind: HDFS_DELEGATION_TOKEN, Service: 192.168.0.201:8020, Ident: (token for hbase: HDFS_DELEGATION_TOKEN owner=hbase/linux5@HADOOP.COM, renewer=yarn, realUser=, issueDate=1557978629661, maxDate=1558583429661, sequenceNumber=216, masterKeyId=17)
19/05/16 11:50:30 INFO mapreduce.JobSubmitter: Kind: HBASE_AUTH_TOKEN, Service: 71f83408-fd3d-4a52-a1f4-d8d71aa707b3, Ident: (org.apache.hadoop.hbase.security.token.AuthenticationTokenIdentifier@3)
19/05/16 11:50:30 INFO impl.YarnClientImpl: Submitted application application_1557904315077_0013
19/05/16 11:50:30 INFO mapreduce.Job: The url to track the job: http://linux5:8088/proxy/application_1557904315077_0013/
19/05/16 11:50:30 INFO mapreduce.Job: Running job: job_1557904315077_0013
19/05/16 11:50:40 INFO mapreduce.Job: Job job_1557904315077_0013 running in uber mode : false
19/05/16 11:50:40 INFO mapreduce.Job: map 0% reduce 0%
19/05/16 11:50:46 INFO mapreduce.Job: map 100% reduce 0%
19/05/16 11:50:47 INFO mapreduce.Job: Job job_1557904315077_0013 completed successfully
19/05/16 11:50:47 INFO mapreduce.Job: Counters: 30
File System Counters
FILE: Number of bytes read=0
FILE: Number of bytes written=188948
FILE: Number of read operations=0
FILE: Number of large read operations=0
FILE: Number of write operations=0
HDFS: Number of bytes read=531035
HDFS: Number of bytes written=0
HDFS: Number of read operations=3
HDFS: Number of large read operations=0
HDFS: Number of write operations=0
Job Counters
Launched map tasks=1
Rack-local map tasks=1
Total time spent by all maps in occupied slots (ms)=4351
Total time spent by all reduces in occupied slots (ms)=0
Total time spent by all map tasks (ms)=4351
Total vcore-milliseconds taken by all map tasks=4351
Total megabyte-milliseconds taken by all map tasks=4455424
Map-Reduce Framework
Map input records=102
Map output records=102
Input split bytes=133
Spilled Records=0
Failed Shuffles=0
Merged Map outputs=0
GC time elapsed (ms)=0
CPU time spent (ms)=2550
Physical memory (bytes) snapshot=385519616
Virtual memory (bytes) snapshot=2801315840
Total committed heap usage (bytes)=634912768
File Input Format Counters
Bytes Read=530902
File Output Format Counters
Bytes Written=0
19/05/16 11:50:47 INFO mapreduce.Job: Running job: job_1557904315077_0013
19/05/16 11:50:47 INFO mapreduce.Job: Job job_1557904315077_0013 running in uber mode : false
19/05/16 11:50:47 INFO mapreduce.Job: map 100% reduce 0%
19/05/16 11:50:47 INFO mapreduce.Job: Job job_1557904315077_0013 completed successfully
19/05/16 11:50:47 INFO mapreduce.Job: Counters: 30
File System Counters
FILE: Number of bytes read=0
FILE: Number of bytes written=188948
FILE: Number of read operations=0
FILE: Number of large read operations=0
FILE: Number of write operations=0
HDFS: Number of bytes read=531035
HDFS: Number of bytes written=0
HDFS: Number of read operations=3
HDFS: Number of large read operations=0
HDFS: Number of write operations=0
Job Counters
Launched map tasks=1
Rack-local map tasks=1
Total time spent by all maps in occupied slots (ms)=4351
Total time spent by all reduces in occupied slots (ms)=0
Total time spent by all map tasks (ms)=4351
Total vcore-milliseconds taken by all map tasks=4351
Total megabyte-milliseconds taken by all map tasks=4455424
Map-Reduce Framework
Map input records=102
Map output records=102
Input split bytes=133
Spilled Records=0
Failed Shuffles=0
Merged Map outputs=0
GC time elapsed (ms)=0
CPU time spent (ms)=2550
Physical memory (bytes) snapshot=385519616
Virtual memory (bytes) snapshot=2801315840
Total committed heap usage (bytes)=634912768
File Input Format Counters
Bytes Read=530902
File Output Format Counters
Bytes Written=0
4.2 查看验证数据
hbase(main):003:0> scan 'kylin_metadata3'
……
……
102 row(s) in 2.6470 seconds
具体验证步骤待完善
5 定时备份
为方便管理,在linux中创建脚本进行全量与增量备份,并设置定时任务。
全量:每周日0点进行全量备份
增量:除周日每天0点进行增量备份
5.1 定时任务文件
每周日零点执行全量备份脚本,对表进行全量备份
周一至周六零点执行增量备份脚本,备份前一天的数据
0 0 * * 7 root ( bash /sga/hbasebak/full.sh )
0 0 * * 1-6 root ( bash /sga/hbasebak/increase.sh )
5.2 备份脚本
5.2.1 增量脚本
#! /bin/bash
year=$(date +%Y)
month=$(date +%m)
day=$(date +%d)
preday=$(date -d '-1 days' +%d )
starttimestamp=$(date -d $year$month$preday +%s)000
endtimestamp=$(date +%s)000
starttime=$(date -d @$(date -d $year$month$preday +%s))
endtime=$(date -d @$(date -d $year$month$day +%s))
time=$(date +%Y%m%d%H%M%S)
filetime=$(date +%Y%m%d)
#认证keytab文件
kinit -kt /sga/keytab/hadoop.keytab hbase/linux5
#遍历需要备份的表
for line in $(cat /sga/hbasebak/table)
#根据遍历结果对表进行备份
do
echo "============================This is the Golden Split Line============================" >>/sga/hbasebak/logs/${line}_inc.log
hbase org.apache.hadoop.hbase.mapreduce.Export ${line} hdfs://linux1:8020/hbase/bak/.hbase-data/inc/${line}_inc_$time 1 $starttimestamp $endtimestamp >>/sga/hbasebak/logs/${line}_inc.log 2>&1
done
5.2.2 全量脚本:
#! /bin/bash
year=$(date +%Y)
month=$(date +%m)
day=$(date +%d)
preday=$(date -d '-1 days' +%d )
starttimestamp=$(date -d $year$month$preday +%s)000
endtimestamp=$(date +%s)000
starttime=$(date -d @$(date -d $year$month$preday +%s))
endtime=$(date -d @$(date -d $year$month$day +%s))
time=$(date +%Y%m%d%H%M%S)
filetime=$(date +%Y%m%d)
#认证keytab文件
kinit -kt /sga/keytab/hadoop.keytab hbase/linux5
#遍历需要备份的表
for line in $(cat /sga/hbasebak/table)
#根据遍历结果对表进行备份
do
echo "============================This is the Golden Split Line============================" >>/sga/hbasebak/logs/${line}_full.log
hbase org.apache.hadoop.hbase.mapreduce.Export ${line} hdfs://linux1:8020/hbase/bak/.hbase-data/full/${line}_full_$time 0 $endtimestamp >>/sga/hbasebak/logs/${line}_full.log 2>&1
done
5.3 备份日志
对每一次备份生成相应日志,日志按照表名和全量增量进行区分
[root@linux5 logs]# ll
总用量 936
-rw-r--r-- 1 root root 101708 5月 17 12:11 emp_full.log
-rw-r--r-- 1 root root 54533 5月 17 13:52 emp_inc.log
-rw-r--r-- 1 root root 101756 5月 17 12:11 kylin_metadata2_full.log
-rw-r--r-- 1 root root 54541 5月 17 13:53 kylin_metadata2_inc.log
-rw-r--r-- 1 root root 101756 5月 17 12:11 kylin_metadata3_full.log
-rw-r--r-- 1 root root 54541 5月 17 13:53 kylin_metadata3_inc.log
-rw-r--r-- 1 root root 101768 5月 17 12:11 kylin_metadata_acl_full.log
-rw-r--r-- 1 root root 54549 5月 17 13:54 kylin_metadata_acl_inc.log
-rw-r--r-- 1 root root 101752 5月 17 12:11 kylin_metadata_full.log
-rw-r--r-- 1 root root 54546 5月 17 13:53 kylin_metadata_inc.log
-rw-r--r-- 1 root root 101772 5月 17 12:12 kylin_metadata_user_full.log
-rw-r--r-- 1 root root 54545 5月 17 13:55 kylin_metadata_user_inc.log