大数据最难源码 hbase 源码 (三)之HBase 创建表 流程源码分析

Connection 和 Admin,HTable 的理解

一个标准的 HBase 客户端程序的写法,来看具体实现:

// 第一步:获取配置
HBaseConfuration conf = HBaseConfuration.create();
// 第二步:获取链接
Connection connection = ConnectionFactory.createConnection(conf);
// 第三步:DDL 和 DML 操作
Admin admin = connection.getAdmin();
// DML 数据处理操作
HTable table = connection.getTable();
// 第四步:具体数据操作
admin.createTable();
table.put(Put put);
table.delete(Delete delete);
Result result = table.get(Get get);
ResultScanner rs = table.getScanner(Scan scan);
// 第五步:结果

在第二步中,会创建 RpcClient。来看具体实现:

ConnectionFactory.createConnection(conf){
// 获取链接实现类
String className = conf.get(ClusterConnection.HBASE_CLIENT_CONNECTION_IMPL, ConnectionImplementation.class.getName());
// 通过反射创建链接实例
Constructor<?> constructor = clazz.getDeclaredConstructor(Configuration.class, ExecutorService.class, User.class);
(Connection) constructor.newInstance(conf, pool, user){
// 构造方法
ConnectionImplementation(Configuration conf, ExecutorService pool, User user) throws IOException {
// 1、批量提交处理器
this.asyncProcess = new AsyncProcess(this, conf, rpcCallerFactory, rpcControllerFactory);
// 2、缓存组件
this.metaCache = new MetaCache(this.metrics);
// 3、 ZK 客户端 
this.registry = ConnectionRegistryFactory.getRegistry(conf){
//  存储了所有的 znode 节点路径
this.znodePaths = new ZNodePaths(conf);
//  ZK 客户端
this.zk = new ReadOnlyZKClient(conf);
}
 // 4、 RPC 客户端
this.rpcClient = RpcClientFactory.createClient(this.conf, this.clusterId, this.metrics);
}
}
}

客户端的核心四大组件:
AsyncProcess,异步处理器,负责提交请求
MetaCache,存在于客户端用于缓存 Meta 表的相关信息(一个 Map + 两个方法)
ConnectionRegistryFactory,HBase 的 ZK 客户端,客户端会跟 ZK 打交道获取 Meta 的信息
RpcClient,RPC 客户端

Procedure 和 ProcedureExecutor 详解

当 HMaster 接收到一个 createTable() 的 RPC 请求的时候,封装成一个 CreateTableProcedure 提交到 ProcedureExecutor。ProcedureExecutor 在
HMaster 启动的时候,会初始化启动。
ProcedureExecutor 创建和初始化
ProcedureExecutor 启动
ProcedureExecutor 接收 Procedure 执行处理

// 当 HMaster 选举成为
HMaster.finishActiveMasterInitialization(){
// 创建 ProcedureExecutor 实例,内部创建了默认至少 16 个线程
createProcedureExecutor(){
// 获取 ProcedureScheduler
MasterProcedureEnv procEnv = new MasterProcedureEnv(this);
MasterProcedureScheduler procedureScheduler = procEnv.getProcedureScheduler();
// 用于持久化 procedure 的状态
procedureStore = new RegionProcedureStore();
// 创建 ProcedureExecutor
procedureExecutor = new ProcedureExecutor<>(conf, procEnv, procedureStore, procedureScheduler);
// RegionProcedureStore 启动
procedureStore.start(numThreads);
// ProcedureExecutor 初始化
procedureExecutor.init(numThreads, abortOnCorruption){
// 初始化工作线程
workerThreads = new CopyOnWriteArrayList<>();
for(int i = 0; i < corePoolSize; ++i) {
workerThreads.add(new WorkerThread(threadGroup));
} 
// 表示 ProcedureScheduler 已经启动
scheduler.start();
// 加载 old Procedure
load(abortOnCorruption);
}
}
}
HMaster.finishActiveMasterInitialization(){
// 启动 ProcedureExecutor
// 其实内部,就是启动 WorkerThread 工作线程
startServiceThreads(){
startProcedureExecutor(){
procedureExecutor.startWorkers(){
// WorkerThread 线程启动
for(WorkerThread worker : workerThreads) {
worker.start();
}
}
}
}
}
class WorkerThread{
public void run() {
// 获取 Procedure
Procedure<TEnvironment> proc = scheduler.poll(keepAliveTime, TimeUnit.MILLISECONDS){
poll(unit.toNanos(timeout)){
// 获取 Procedure
final Procedure pollResult = dequeue(){
Procedure<?> pollResult = doPoll(metaRunQueue);
if(pollResult = =  null) {
pollResult = doPoll(serverRunQueue);
} if(pollResult = = null) {
pollResult = doPoll(peerRunQueue);
} if(pollResult == null) {
pollResult = doPoll(tableRunQueue);
} return pollResult;
} return pollResult;
}
} this.activeProcedure = proc;
// 执行 Procedure
executeProcedure(proc);
}
}

HMaster.createTable(TableDescriptor tableDescriptor, final byte[][] splitKeys, final long nonceGroup, final long nonce){
// 创建 RegionInfo
TableDescriptor desc = getMasterCoprocessorHost().preCreateTableRegionsInfos(tableDescriptor);
String namespace = desc.getTableName().getNamespaceAsString();
RegionInfo[] newRegions = ModifyRegionUtils.createRegionInfos(desc, splitKeys);
// 提交 CreateTableProcedure 给 ProcedureExecutor
submitProcedure(new CreateTableProcedure(procedureExecutor.getEnvironment(), desc, newRegions, latch)){
getProcedureExecutor().submitProcedure(proc, nonceKey){
prepareProcedure(proc);
store.insert(proc, null);
//
return pushProcedure(proc){
// procedure 注册
procedures.put(currentProcId, proc);
// 将 Procedure 加入队列
scheduler.addBack(proc){
// 内部实现:不同的 Procedure 加入不同的队列
enqueue(procedure, addFront){
if(isMetaProcedure(proc)) {
doAdd(metaRunQueue, getMetaQueue(), proc, addFront);
} else if(isTableProcedure(proc)) {
doAdd(tableRunQueue, getTableQueue(getTableName(proc)), proc, addFront);
} else if(isServerProcedure(proc)) {
ServerProcedureInterface spi = (ServerProcedureInterface) proc;
doAdd(serverRunQueue, getServerQueue(spi.getServerName(), spi), proc, addFront);
} else if(isPeerProcedure(proc)) {
doAdd(peerRunQueue, getPeerQueue(getPeerId(proc)), proc, addFront);
} else {
throw new UnsupportedOperationException("");
}
}
}
class WorkerThread{
public void run() {
// 从队列中获取 CreateTableProcedure
Procedure<TEnvironment> proc = scheduler.poll(keepAliveTime, TimeUnit.MILLISECONDS);
// 执行 Procedure
executeProcedure(proc){
execProcedure(procStack, proc){
procedure.doExecute(getEnvironment()){
execute(env){
// 状态机执行
stateFlow = executeFromState(env, state){
CreateTableProcedure.executeFromState(final MasterProcedureEnv env, final CreateTableState state);
}
}
}

DDL 创建表服务端处理,CreateTableProcedure

CreateTableProcedure核心逻辑executeFromStat

CreateTableProcedure.executeFromState(final MasterProcedureEnv env, final CreateTableState state){
switch(state) {
case CREATE_TABLE_PRE_OPERATION:
boolean exists = !prepareCreate(env);
releaseSyncLatch();
if(exists) {
assert isFailed() : "the delete should have an exception here";
return Flow.NO_MORE_STATE;
} preCreate(env);
setNextState(CreateTableState.CREATE_TABLE_WRITE_FS_LAYOUT);
break;
case CREATE_TABLE_WRITE_FS_LAYOUT:
DeleteTableProcedure.deleteFromFs(env, getTableName(), newRegions, true);
newRegions = createFsLayout(env, tableDescriptor, newRegions);
env.getMasterServices().getTableDescriptors().update(tableDescriptor, true);
setNextState(CreateTableState.CREATE_TABLE_ADD_TO_META);
break;
case CREATE_TABLE_ADD_TO_META:
newRegions = addTableToMeta(env, tableDescriptor, newRegions);
setNextState(CreateTableState.CREATE_TABLE_ASSIGN_REGIONS);
break;
case CREATE_TABLE_ASSIGN_REGIONS:
setEnablingState(env, getTableName());
addChildProcedure(env.getAssignmentManager().createRoundRobinAssignProcedures(...));
setNextState(CreateTableState.CREATE_TABLE_UPDATE_DESC_CACHE);
break;
case CREATE_TABLE_UPDATE_DESC_CACHE:
setEnabledState(env, getTableName());
setNextState(CreateTableState.CREATE_TABLE_POST_OPERATION);
break;
case CREATE_TABLE_POST_OPERATION:
postCreate(env);
return Flow.NO_MORE_STATE;
default:
throw new UnsupportedOperationException("unhandled state=" + state);
}
}

Meta 表初始化 InitMetaProcedure

如果 HBase 集群是第一次启动,初始化 Meta 表

HMaster.finishActiveMasterInitialization(){
// 初始化 Meta 表的 Procedure
InitMetaProcedure temp = new InitMetaProcedure();
// 提交 Procedure
procedureExecutor.submitProcedure(temp);
}
InitMetaProcedure.executeFromState(MasterProcedureEnv env, InitMetaState state){
switch(state) {
case INIT_META_WRITE_FS_LAYOUT:
// TODO_MA 注释: 重点,创建 HRegion
TableDescriptor td = writeFsLayout(rootDir, conf);
setNextState(InitMetaState.INIT_META_ASSIGN_META);
return Flow.HAS_MORE_STATE;
case INIT_META_ASSIGN_META:
addChildProcedure(env.getAssignmentManager().createAssignProcedures(....));
return Flow.NO_MORE_STATE;
default:
throw new UnsupportedOperationException("unhandled state=" + state);
}
}

InitMetaProcedure.writeFsLayout(){
FileSystem fs = rootDir.getFileSystem(conf);
// /rootDir/data/namespace/tableName
Path tableDir = CommonFSUtils.getTableDir(rootDir, TableName.META_TABLE_NAME);
//  如果存在就删除
if(fs.exists(tableDir) && !fs.delete(tableDir, true)) {
LOG.warn("Can not delete partial created meta table, continue...");
} // 获取表定义
TableDescriptor metaDescriptor = FSTableDescriptors.tryUpdateAndGetMetaTableDescriptor(conf, fs, rootDir);
// 创建 Meta 的 region
HRegion.createHRegion(RegionInfoBuilder.FIRST_META_REGIONINFO, rootDir, conf, metaDescriptor, null){
// 创建 HDFS 上的 region 的目录
createRegionDir(conf, info, rootDir);
// 创建 HRegion 对象
HRegion region = HRegion.newHRegion(tableDir, wal, fs, conf, info, hTableDescriptor, rsRpcServices);
if(initialize) {
region.initialize(null){
initializeRegionInternals(reporter, status){
// 初始化 HStore
initializeStores(reporter, status){
// 遍历每个 ColumnFamily, 创建 HStore
for(final ColumnFamilyDescriptor family : htableDescriptor.getColumnFamilies()) {
completionService.submit(new Callable<HStore>() {
@Override
public HStore call() throws IOException {
return instantiateHStore(family, warmup){
if(family.isMobEnabled()) {
if(HFile.getFormatVersion(this.conf) >= HFile.MIN_FORMAT_VERSION_WITH_TAGS) {
return new HMobStore(this, family, this.conf, warmup);
}
} // 初始化 HStore
return new HStore(this, family, this.conf, warmup){
this.dataBlockEncoder = new HFileDataBlockEncoderImpl(....);
// 初始化 MemStore
this.memstore = getMemstore(){
MemStore ms = null;
switch(inMemoryCompaction) {
case NONE:
ms = DefaultMemStore.class;
break;
default:
ms = CompactingMemStore.class;
} return ms;
} this.storeEngine = createStoreEngine(....);
List<HStoreFile> hStoreFiles = loadStoreFiles(warmup);
}
}
}
});


}
} // 恢复日志
if(!isRestoredRegion && ServerRegionReplicaUtil.shouldReplayRecoveredEdits(this)) {
Collection<HStore> stores = this.stores.values();
stores.forEach(HStore::startReplayingFromWAL);
replayRecoveredEditsIfAny(maxSeqIdInStores, reporter, status);
loadRecoveredHFilesIfAny(stores);
stores.forEach(HStore::stopReplayingFromWAL);
} // RegionSplit 策略
this.splitPolicy = RegionSplitPolicy.create(this, conf);
splitRestriction = RegionSplitRestriction.create(getTableDescriptor(), conf);
this.flushPolicy = FlushPolicyFactory.create(this, conf);
}
}
}
} return metaDescriptor;
}

env.getAssignmentManager().createAssignProcedures(){
new TransitRegionStateProcedure(env, region, targetServer, forceNewPlan, TransitionType.ASSIGN);
}
***// TransitRegionStateProcedure 的详细逻辑:***
TransitRegionStateProcedure.executeFromState(MasterProcedureEnv env, RegionStateTransitionState state){
switch(state) {
case REGION_STATE_TRANSITION_GET_ASSIGN_CANDIDATE:
// 由 AssignManager 完成 Region 分配
queueAssign(env, regionNode);
return Flow.HAS_MORE_STATE;
case REGION_STATE_TRANSITION_OPEN:
// 提交执行 OpenRegionProcedure
openRegion(env, regionNode);
return Flow.HAS_MORE_STATE;
case REGION_STATE_TRANSITION_CONFIRM_OPENED:
// 确认打开
return confirmOpened(env, regionNode);
case REGION_STATE_TRANSITION_CLOSE:
// 关闭
closeRegion(env, regionNode);
return Flow.HAS_MORE_STATE;
case REGION_STATE_TRANSITION_CONFIRM_CLOSED:
// 确认关闭
return confirmClosed(env, regionNode);
default:
throw new UnsupportedOperationException("unhandled state=" + state);
}
}

从来不完美,一直不放弃,先模仿

  • 3
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 1
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值