Ceph版本:14.2.22
Server::handle_client_unlink
文件路径:ceph/src/mds/Server.cc
当客户端发送删除文件请求到mds服务端时,mds会根据客户端请求类型,执行handle_client_unlink,该函数不仅仅实现删除操作,当在处理取消硬链接或者软链接操作时,也会执行此函数。该函数主要完成以下内容
检查当前操作是否是删除目录操作,并设置删除目录操作标志;
- 获取被删除目录/文件的相对路径;
- 根据被删除目录/文件的文件名解析出对应的CDentry;
- 创建被删除目录/文件对应的系统临时目录/文件的CDentry;
- 对被删除目录/文件、被删除目录/文件的上一级目录、被删除目录/文件对应的系统临时文件相关结构上锁;
- 调用_unlink_local函数做进一步处理
void Server::handle_client_unlink(MDRequestRef &mdr)
{
//获取client request
const MClientRequest::const_ref &req = mdr->client_request;
//获取client序号
client_t client = mdr->get_client();
// rmdir or unlink?
//rmdir用来标记当前操作是删除目录
bool rmdir = false;
if (req->get_op() == CEPH_MDS_OP_RMDIR)
rmdir = true;
//获取被删除目录/文件相对路径,实际上就是被删除目录/文件的名字
const filepath &refpath = req->get_filepath();
//判断该路径下是否有被删除目录/文件,0表示没有
if (refpath.depth() == 0)
{
respond_to_request(mdr, -EINVAL);
return;
}
//判断待处理目录/文件是否是隐藏文件
if (refpath.is_last_dot_or_dotdot())
{
respond_to_request(mdr, -ENOTEMPTY);
return;
}
// traverse to path
vector<CDentry *> trace;
CInode *in;
CF_MDS_MDRContextFactory cf(mdcache, mdr);
//根据待处理目录/文件的名字解析出该目录/文件的CDentry
int r = mdcache->path_traverse(mdr, cf, refpath, &trace, &in, MDS_TRAVERSE_FORWARD);
if (r > 0)
return;
if (r < 0)
{
if (r == -ESTALE)
{
dout(10) << "FAIL on ESTALE but attempting recovery" << dendl;
mdcache->find_ino_peers(refpath.get_ino(), new C_MDS_TryFindInode(this, mdr));
return;
}
respond_to_request(mdr, r);
return;
}
if (mdr->snapid != CEPH_NOSNAP)
{
respond_to_request(mdr, -EROFS);
return;
}
//获取待处理目录/文件的CDentry
CDentry *dn = trace.back();
ceph_assert(dn);
//如果待处理目录/文件的CDentry不是权威的,重定向请求
if (!dn->is_auth())
{
mdcache->request_forward(mdr, dn->authority().first);
return;
}
//获取待处理目录/文件的上一级目录的CInode
CInode *diri = dn->get_dir()->get_inode();
//获取待处理目录/文件的CDentry的linkage
CDentry::linkage_t *dnl = dn->get_linkage(client, mdr);
ceph_assert(!dnl->is_null());
if (rmdir)
{
dout(7) << "handle_client_rmdir on " << *dn << dendl;
}
else
{
dout(7) << "handle_client_unlink on " << *dn << dendl;
}
dout(7) << "dn links to " << *in << dendl;
// rmdir vs is_dir
//对于目录来说,因目录没有硬链接,所以unlink操作只是删除操作
if (in->is_dir())
{
//如果是删除操作
if (rmdir)
{
// do empty directory checks
// 检查是否是空目录,如果目录是空的,结束操作
if (_dir_is_nonempty_unlocked(mdr, in))
{
respond_to_request(mdr, -ENOTEMPTY);
return;
}
}
else
{ // 如果是取消链接操作,结束操作
dout(7) << "handle_client_unlink on dir " << *in << ", returning error" << dendl;
respond_to_request(mdr, -EISDIR);
return;
}
}
else
{ //因为rmdir删除目录操作标志,所以如果不是目录,rmdir是没有任何意义的
if (rmdir)
{
// unlink
dout(7) << "handle_client_rmdir on non-dir " << *in << ", returning error" << dendl;
respond_to_request(mdr, -ENOTDIR);
return;
}
}
// -- create stray dentry? --
// 创建待处理目录/文件对应的stray dentry,stary dentry是ceph文件系统的系统临时文件
//可以理解为回收站
CDentry *straydn = NULL;
//primary:当inode有多个链接时,第一个链接的dentry被称为primary,其他情况被称为remote
if (dnl->is_primary())
{
//根据待处理目录/文件的inode信息获取对应的stray dentry或者创建一个新的stray dentry
straydn = prepare_stray_dentry(mdr, dnl->get_inode());
if (!straydn)
return;
dout(10) << " straydn is " << *straydn << dendl;
}
else if (mdr->straydn)
{
mdr->unpin(mdr->straydn);
mdr->straydn = NULL;
}
// 对待处理目录/文件的CDentry,Cinode上锁
// 对待处理目录/文件的上一级目录的CInode上锁
// 对stray dentry上锁
// 对stray dir的上锁
// lock
MutationImpl::LockOpVec lov;
for (int i = 0; i < (int)trace.size() - 1; i++)
lov.add_rdlock(&trace[i]->lock);
lov.add_xlock(&dn->lock);
lov.add_wrlock(&diri->filelock);
lov.add_wrlock(&diri->nestlock);
lov.add_xlock(&in->linklock);
if (straydn)
{
lov.add_wrlock(&straydn->get_dir()->inode->filelock);
lov.add_wrlock(&straydn->get_dir()->inode->nestlock);
lov.add_xlock(&straydn->lock);
}
mds->locker->include_snap_rdlocks(diri, lov);
lov.add_xlock(&in->snaplock);
if (in->is_dir())
lov.add_rdlock(&in->filelock); // to verify it's empty
if (!mds->locker->acquire_locks(mdr, lov))
return;
if (in->is_dir() && _dir_is_nonempty(mdr, in))
{
respond_to_request(mdr, -ENOTEMPTY);
return;
}
if ((!mdr->has_more() || mdr->more()->witnessed.empty()))
{
if (!check_access(mdr, diri, MAY_WRITE))
return;
}
if (straydn)
straydn->first = mdcache->get_global_snaprealm()->get_newest_seq() + 1;
if (!mdr->more()->desti_srnode)
{
if (in->is_projected_snaprealm_global())
{
sr_t *new_srnode = in->prepare_new_srnode(0);
in->record_snaprealm_parent_dentry(new_srnode, nullptr, dn, dnl->is_primary());
// dropping the last linkage or dropping the last remote linkage,
// detch the inode from global snaprealm
auto nlink = in->get_projected_inode()->nlink;
if (nlink == 1 || (nlink == 2 && !dnl->is_primary() && !in->get_projected_parent_dir()->inode->is_stray()))
in->clear_snaprealm_global(new_srnode);
mdr->more()->desti_srnode = new_srnode;
}
else if (dnl->is_primary())
{
// prepare snaprealm blob for slave request
SnapRealm *realm = in->find_snaprealm();
snapid_t follows = realm->get_newest_seq();
if (in->snaprealm || follows + 1 > in->get_oldest_snap())
{
sr_t *new_srnode = in->prepare_new_srnode(follows);
in->record_snaprealm_past_parent(new_srnode, straydn->get_dir()->inode->find_snaprealm());
mdr->more()->desti_srnode = new_srnode;
}
}
}
// yay!
if (in->is_dir() && in->has_subtree_root_dirfrag())
{
// subtree root auths need to be witnesses
set<mds_rank_t> witnesses;
in->list_replicas(witnesses);
dout(10) << " witnesses " << witnesses << ", have " << mdr->more()->witnessed << dendl;
for (set<mds_rank_t>::iterator p = witnesses.begin(); p != witnesses.end(); ++p)
{
if (mdr->more()->witnessed.count(*p))
{
dout(10) << " already witnessed by mds." << *p << dendl;
}
else if (mdr->more()->waiting_on_slave.count(*p))
{
dout(10) << " already waiting on witness mds." << *p << dendl;
}
else
{
if (!_rmdir_prepare_witness(mdr, *p, trace, straydn))
return;
}
}
if (!mdr->more()->waiting_on_slave.empty())
return; // we're waiting for a witness.
}
// ok!
if (dnl->is_remote() && !dnl->get_inode()->is_auth())
_link_remote(mdr, false, dn, dnl->get_inode());
else
_unlink_local(mdr, dn, straydn);
}
由于时间问题,文章还在努力完善中…