glance支持延迟删除镜像的功能,个人觉得挺实用的,特别是在误删除的情况下。从某种程度来说,也算是对数据一种另类保护吧。


大致实现原理是:有个delayed_delete设置是否开启延迟删除的开关,如果为True的话,每次删除镜像的时候都会把镜像的状态置为pending_delete,记录此刻的delete_time,有个scrubber的后台进程会每隔一段时间(wakeup_time)去check是否有pending_delete的镜像要删除,删除的判断标准是:该镜像被删除的那个时刻的delete_time + scrub_time <= time.time(),scrub_time是镜像要隔多少秒才真正被擦除掉。


开启delayed_delete

[root@controller2 ~(keystone_admin)]# vim /etc/glance/glance-api.conf
delayed_delete = True


 来看glance api端删除镜像的时候判断是否开启了delayed_delete的代码

# v1的api
glance/api/v1/p_w_picpath.py
@utils.mutating
    def delete(self, req, id):
        """
        Deletes the p_w_picpath and all its chunks from the Glance
        :param req: The WSGI/Webob Request object
        :param id: The opaque p_w_picpath identifier
        :raises: HttpBadRequest if p_w_picpath registry is invalid
        :raises: HttpNotFound if p_w_picpath or any chunk is not available
        :raises: HttpUnauthorized if p_w_picpath or any chunk is not
                deleteable by the requesting user
        """
        self._enforce(req, 'delete_p_w_picpath')
        p_w_picpath = self.get_p_w_picpath_meta_or_404(req, id)
        if p_w_picpath['protected']:
            msg = _("Image is protected")
            LOG.warn(msg)
            raise HTTPForbidden(explanation=msg,
                                request=req,
                                content_type="text/plain")
        if p_w_picpath['status'] == 'pending_delete':
            msg = (_("Forbidden to delete a %s p_w_picpath.") %
                   p_w_picpath['status'])
            LOG.warn(msg)
            raise HTTPForbidden(explanation=msg,
                                request=req,
                                content_type="text/plain")
        elif p_w_picpath['status'] == 'deleted':
            msg = _("Image %s not found.") % id
            LOG.warn(msg)
            raise HTTPNotFound(explanation=msg, request=req,
                               content_type="text/plain")
        if p_w_picpath['location'] and CONF.delayed_delete:      # 这里做了判断
            status = 'pending_delete'
        else:
            status = 'deleted'
。。。。。。            
            

# v2的api         
glance/api/v2/p_w_picpath.py
@utils.mutating
    def delete(self, req, p_w_picpath_id):
        p_w_picpath_repo = self.gateway.get_repo(req.context)
        try:
            p_w_picpath = p_w_picpath_repo.get(p_w_picpath_id)
            p_w_picpath.delete()    # 跟进去看
            p_w_picpath_repo.remove(p_w_picpath)
        except (glance_store.Forbidden, exception.Forbidden) as e:
            LOG.debug("User not permitted to delete p_w_picpath '%s'", p_w_picpath_id)
            raise webob.exc.HTTPForbidden(explanation=e.msg)
        except (glance_store.NotFound, exception.NotFound) as e:
            msg = (_("Failed to find p_w_picpath %(p_w_picpath_id)s to delete") %
                   {'p_w_picpath_id': p_w_picpath_id})
            LOG.warn(msg)
            raise webob.exc.HTTPNotFound(explanation=msg)
        except glance_store.exceptions.InUseByStore as e:
            msg = (_("Image %(id)s could not be deleted "
                     "because it is in use: %(exc)s") %
                   {"id": p_w_picpath_id,
                    "exc": e.msg})
            LOG.warn(msg)
            raise webob.exc.HTTPConflict(explanation=msg)
        except glance_store.exceptions.HasSnapshot as e:
            raise webob.exc.HTTPConflict(explanation=e.msg)
        except exception.InvalidImageStatusTransition as e:
            raise webob.exc.HTTPBadRequest(explanation=e.msg)
        except exception.NotAuthenticated as e:
            raise webob.exc.HTTPUnauthorized(explanation=e.msg)

            
glance/domain/proxy.py
class Image(object):
    def __init__(self, base, member_repo_proxy_class=None,
                 member_repo_proxy_kwargs=None):
        self.base = base
        self.helper = Helper(member_repo_proxy_class,
                             member_repo_proxy_kwargs)

    name = _proxy('base', 'name')
    p_w_picpath_id = _proxy('base', 'p_w_picpath_id')
    status = _proxy('base', 'status')
    created_at = _proxy('base', 'created_at')
    updated_at = _proxy('base', 'updated_at')
    visibility = _proxy('base', 'visibility')
    min_disk = _proxy('base', 'min_disk')
    min_ram = _proxy('base', 'min_ram')
    protected = _proxy('base', 'protected')
    locations = _proxy('base', 'locations')
    checksum = _proxy('base', 'checksum')
    owner = _proxy('base', 'owner')
    disk_format = _proxy('base', 'disk_format')
    container_format = _proxy('base', 'container_format')
    size = _proxy('base', 'size')
    virtual_size = _proxy('base', 'virtual_size')
    extra_properties = _proxy('base', 'extra_properties')
    tags = _proxy('base', 'tags')

    def delete(self):
        self.base.delete()   # 这里的base来自glance/domain/__init__.py

glance/domain/__init__.py
class Image(object):
    valid_state_targets = {
        # Each key denotes a "current" state for the p_w_picpath. Corresponding
        # values list the valid states to which we can jump from that "current"
        # state.
        # NOTE(flwang): In v2, we are deprecating the 'killed' status, so it's
        # allowed to restore p_w_picpath from 'saving' to 'queued' so that upload
        # can be retried.
        'queued': ('saving', 'active', 'deleted'),
        'saving': ('active', 'killed', 'deleted', 'queued'),
        'active': ('pending_delete', 'deleted', 'deactivated'),
        'killed': ('deleted',),
        'pending_delete': ('deleted',),
        'deleted': (),
        'deactivated': ('active', 'deleted'),
    }

    def __init__(self, p_w_picpath_id, status, created_at, updated_at, **kwargs):
        self.p_w_picpath_id = p_w_picpath_id
        self.status = status
        self.created_at = created_at
        self.updated_at = updated_at
        self.name = kwargs.pop('name', None)
        self.visibility = kwargs.pop('visibility', 'private')
        self.min_disk = kwargs.pop('min_disk', 0)
        self.min_ram = kwargs.pop('min_ram', 0)
        self.protected = kwargs.pop('protected', False)
        self.locations = kwargs.pop('locations', [])
        self.checksum = kwargs.pop('checksum', None)
        self.owner = kwargs.pop('owner', None)
        self._disk_format = kwargs.pop('disk_format', None)
        self._container_format = kwargs.pop('container_format', None)
        self.size = kwargs.pop('size', None)
        self.virtual_size = kwargs.pop('virtual_size', None)
        extra_properties = kwargs.pop('extra_properties', {})
        self.extra_properties = ExtraProperties(extra_properties)
        self.tags = kwargs.pop('tags', [])
        if kwargs:
            message = _("__init__() got unexpected keyword argument '%s'")
            raise TypeError(message % list(kwargs.keys())[0])
            
    def delete(self):            # base调用的是这个delete方法
        if self.protected:
            raise exception.ProtectedImageDelete(p_w_picpath_id=self.p_w_picpath_id)
        if CONF.delayed_delete and self.locations:    # 跟v1 api同样的判断
            self.status = 'pending_delete'
        else:
            self.status = 'deleted'
            
# v2 api有gateway、proxy、domain这些概念,留个悬念,下次弄清楚。

这里是官方对gateway、domain、proxy的介绍:http://docs.openstack.org/developer/glance/domain_model.html


修改glance-scrubber.conf文件

[root@controller2 ~(keystone_admin)]# egrep -v "^$|^#" /etc/glance/glance-scrubber.conf
[DEFAULT]
scrub_time=300
delayed_delete=true
send_identity_headers=true
wakeup_time=60
daemon=True
admin_user=glance
admin_password=glance
admin_tenant_name=service
auth_url=http://controller2:35357/v2.0
auth_region=RegionOne
registry_host=controller2
registry_port=9191
[database]
connection = mysql+pymysql://glance:glance@controller2/glance?charset=utf8
[oslo_concurrency]
[oslo_policy]
[glance_store]
default_store = rbd
stores = rbd,http,cinder
rbd_store_pool = p_w_picpaths
rbd_store_user = glance
rbd_store_ceph_conf = /etc/ceph/ceph.conf
rbd_store_chunk_size = 8
filesystem_store_datadirs = /var/lib/glance/p_w_picpaths


启动glance-srubber服务

[root@controller2 ~(keystone_admin)]# service openstack-glance-scrubber start


接下来看glance scrubber的启动过程

glance/cmd/scrubber.py
def main():
    CONF.register_cli_opts(scrubber.scrubber_cmd_cli_opts)
    CONF.register_opts(scrubber.scrubber_cmd_opts)
    try:
        config.parse_args()
        logging.setup(CONF, 'glance')
        glance_store.register_opts(config.CONF)
        glance_store.create_stores(config.CONF)  # 会调用glance_store/backend.py的create_stores函数,初始化SCHEME_TO_CLS_MAP
        glance_store.verify_default_store()
        app = scrubber.Scrubber(glance_store) # 会作为下面的store_api
        if CONF.daemon:     # 让glance-scrubber以daemon方式存在
            server = scrubber.Daemon(CONF.wakeup_time)
            server.start(app)
            server.wait()
        else:
            app.run()
    except RuntimeError as e:
        sys.exit("ERROR: %s" % e)
        
if __name__ == '__main__':
    main()


Daemon类

glance/scrubber.py
class Daemon(object):
    def __init__(self, wakeup_time=300, threads=100):
        LOG.info(_LI("Starting Daemon: wakeup_time=%(wakeup_time)s "
                     "threads=%(threads)s"),
                 {'wakeup_time': wakeup_time, 'threads': threads})
        self.wakeup_time = wakeup_time
        self.event = eventlet.event.Event()
        # This pool is used for periodic instantiation of scrubber
        self.daemon_pool = eventlet.greenpool.GreenPool(threads)
    def start(self, application):
        self._run(application)
    def wait(self):
        try:
            self.event.wait()
        except KeyboardInterrupt:
            msg = _LI("Daemon Shutdown on KeyboardInterrupt")
            LOG.info(msg)
    def _run(self, application):
        LOG.debug("Running application")
        self.daemon_pool.spawn_n(application.run, self.event)   # 这里也用eventlet
        eventlet.spawn_after(self.wakeup_time, self._run, application)  # application就是下面Scrubber的instance
        LOG.debug("Next run scheduled in %s seconds", self.wakeup_time)


Scrubber类

class Scrubber(object):
    def __init__(self, store_api):
        LOG.info(_LI("Initializing scrubber with configuration: %s"),
                 six.text_type({'registry_host': CONF.registry_host,
                                'registry_port': CONF.registry_port}))
        self.store_api = store_api
        registry.configure_registry_client()
        registry.configure_registry_admin_creds()  # glance/registry/client/v2或v1/api.py,初始好_CLIENT_CREDS,获得registry client需要
        # Here we create a request context with credentials to support
        # delayed delete when using multi-tenant backend storage
        admin_user = CONF.admin_user
        admin_tenant = CONF.admin_tenant_name  # 需要配置,授权用的,要获得registry的client instance
        if CONF.send_identity_headers:      # 之前没enable send_identity_headers,一直授权失败,难道有坑?
            # When registry is operating in trusted-auth mode
            roles = [CONF.admin_role]
            self.admin_context = context.RequestContext(user=admin_user,
                                                        tenant=admin_tenant,
                                                        auth_token=None,
                                                        roles=roles)
            self.registry = registry.get_registry_client(self.admin_context)
        else:
            ctxt = context.RequestContext()
            self.registry = registry.get_registry_client(ctxt)
            auth_token = self.registry.auth_token
            self.admin_context = context.RequestContext(user=admin_user,
                                                        tenant=admin_tenant,
                                                        auth_token=auth_token)
        self.db_queue = get_scrub_queue()
        self.pool = eventlet.greenpool.GreenPool(CONF.scrub_pool_size)
        
# 每隔wakeup_time秒就会执行这个run函数        
    def run(self, event=None):
        delete_jobs = self._get_delete_jobs()

        if delete_jobs:
            list(self.pool.starmap(self._scrub_p_w_picpath, delete_jobs.items()))  # 对后面可迭代对象迭代执行_scrub_p_w_picpath函数
         
# _scrub_p_w_picpath函数          
    def _scrub_p_w_picpath(self, p_w_picpath_id, delete_jobs):
        if len(delete_jobs) == 0:
            return

        LOG.info(_LI("Scrubbing p_w_picpath %(id)s from %(count)d locations."),
                 {'id': p_w_picpath_id, 'count': len(delete_jobs)})

        success = True
        for img_id, loc_id, uri in delete_jobs:
            try:
                self._delete_p_w_picpath_location_from_backend(img_id, loc_id, uri)
            except Exception:
                success = False

        if success:
            p_w_picpath = self.registry.get_p_w_picpath(p_w_picpath_id)
            if p_w_picpath['status'] == 'pending_delete':
                self.registry.update_p_w_picpath(p_w_picpath_id, {'status': 'deleted'}) # 利用上面获得的registry client更新p_w_picpath的状态,registry是跟数据库打交道的
            LOG.info(_LI("Image %s has been scrubbed successfully"), p_w_picpath_id)
        else:
            LOG.warn(_LW("One or more p_w_picpath locations couldn't be scrubbed "
                         "from backend. Leaving p_w_picpath '%s' in 'pending_delete'"
                         " status") % p_w_picpath_id)

# _delete_p_w_picpath_location_from_backend函数                                                 
    def _delete_p_w_picpath_location_from_backend(self, p_w_picpath_id, loc_id, uri):
        if CONF.metadata_encryption_key:
            uri = crypt.urlsafe_decrypt(CONF.metadata_encryption_key, uri) # uri有加密,就先解密
        try:
            LOG.debug("Scrubbing p_w_picpath %s from a location.", p_w_picpath_id)
            try:
                self.store_api.delete_from_backend(uri, self.admin_context)  # store_api是glance_store/__init__.py
            except store_exceptions.NotFound:
                LOG.info(_LI("Image location for p_w_picpath '%s' not found in "
                             "backend; Marking p_w_picpath location deleted in "
                             "db."), p_w_picpath_id)

            if loc_id != '-':
                db_api.get_api().p_w_picpath_location_delete(self.admin_context,
                                                       p_w_picpath_id,
                                                       int(loc_id),
                                                       'deleted')
            LOG.info(_LI("Image %s is scrubbed from a location."), p_w_picpath_id)
        except Exception as e:
            LOG.error(_LE("Unable to scrub p_w_picpath %(id)s from a location. "
                          "Reason: %(exc)s ") %
                      {'id': p_w_picpath_id,
                       'exc': encodeutils.exception_to_unicode(e)})
            raise


# _get_delete_jobs函数,获取要删除的镜像的dict           
    def _get_delete_jobs(self):
        try:
            records = self.db_queue.get_all_locations()  # ScrubDBQueue类的get_all_locations函数
        except Exception as err:
            LOG.error(_LE("Can not get scrub jobs from queue: %s") %
                      encodeutils.exception_to_unicode(err))
            return {}

        delete_jobs = {}
        for p_w_picpath_id, loc_id, loc_uri in records:
            if p_w_picpath_id not in delete_jobs:
                delete_jobs[p_w_picpath_id] = []
            delete_jobs[p_w_picpath_id].append((p_w_picpath_id, loc_id, loc_uri))
        return delete_jobs
        

# ScrubDBQueue类的get_all_locations函数 
    def get_all_locations(self):
        """Returns a list of p_w_picpath id and location tuple from scrub queue.

        :returns: a list of p_w_picpath id, location id and uri tuple from
            scrub queue

        """
        ret = []

        for p_w_picpath in self._get_all_p_w_picpaths():
            deleted_at = p_w_picpath.get('deleted_at')
            if not deleted_at:
                continue

            # NOTE: Strip off microseconds which may occur after the last '.,'
            # Example: 2012-07-07T19:14:34.974216
            date_str = deleted_at.rsplit('.', 1)[0].rsplit(',', 1)[0]
            delete_time = calendar.timegm(time.strptime(date_str,
                                                        "%Y-%m-%dT%H:%M:%S"))

            if delete_time + self.scrub_time > time.time():  # 判断是否到了清除的时间
                continue

            for loc in p_w_picpath['location_data']:
                if loc['status'] != 'pending_delete':   # 判断是否是pending_delete状态
                    continue

                if self.metadata_encryption_key:        # 判断镜像uri是否加密
                    uri = crypt.urlsafe_encrypt(self.metadata_encryption_key,
                                                loc['url'], 64)
                else:
                    uri = loc['url']

                ret.append((p_w_picpath['id'], loc['id'], uri))
        return ret


下面都是关于glance_store,算是glance的子项目了,专门和后端真正存储打交道的。

glance_store/__init__.py
from .backend import *  # noqa
from .driver import *  # noqa
from .exceptions import *  # noqa

# 来看store_api.delete_from_backend函数
glance_store/backend.py
def delete_from_backend(uri, context=None):
    """Removes chunks of data from backend specified by uri."""

    loc = location.get_location_from_uri(uri, conf=CONF)
    store = get_store_from_uri(uri)
    return store.delete(loc, context=context)

# get_store_from_uri函数
def get_store_from_uri(uri):
    """
    Given a URI, return the store object that would handle
    operations on the URI.

    :param uri: URI to analyze
    """
    scheme = uri[0:uri.find('/') - 1]  # 形如 得到的会是这样的file、rbd 
    return get_store_from_scheme(scheme)
    
# get_store_from_scheme函数,从SCHEME_TO_CLS_MAP中获取对应的schema mapping
def get_store_from_scheme(scheme):
    """
    Given a scheme, return the appropriate store object
    for handling that scheme.
    """
    if scheme not in location.SCHEME_TO_CLS_MAP:
        raise exceptions.UnknownScheme(scheme=scheme)
    scheme_info = location.SCHEME_TO_CLS_MAP[scheme]
    store = scheme_info['store']
    if not store.is_capable(capabilities.BitMasks.DRIVER_REUSABLE):
        # Driver instance isn't stateless so it can't
        # be reused safely and need recreation.
        store_entry = scheme_info['store_entry']
        store = _load_store(store.conf, store_entry, invoke_load=True)
        store.configure()
        try:
            scheme_map = {}
            loc_cls = store.get_store_location_class()
            for scheme in store.get_schemes():
                scheme_map[scheme] = {
                    'store': store,
                    'location_class': loc_cls,
                    'store_entry': store_entry
                }
                location.register_scheme_map(scheme_map)
        except NotImplementedError:
            scheme_info['store'] = store
    return store
    
# 上面配置的stores是rbd,获得的就是glance_store/_drivers/rbd.py
@capabilities.check
    def delete(self, location, context=None):
        """
        Takes a `glance_store.location.Location` object that indicates
        where to find the p_w_picpath file to delete.

        :param location: `glance_store.location.Location` object, supplied
                  from glance_store.location.get_location_from_uri()

        :raises: NotFound if p_w_picpath does not exist;
                InUseByStore if p_w_picpath is in use or snapshot unprotect failed
        """
        loc = location.store_location
        target_pool = loc.pool or self.pool
        self._delete_p_w_picpath(target_pool, loc.p_w_picpath, loc.snapshot)

# _delete_p_w_picpath函数        
    def _delete_p_w_picpath(self, target_pool, p_w_picpath_name,
                      snapshot_name=None, context=None):
        """
        Delete RBD p_w_picpath and snapshot.

        :param p_w_picpath_name: Image's name
        :param snapshot_name: Image snapshot's name

        :raises: NotFound if p_w_picpath does not exist;
                InUseByStore if p_w_picpath is in use or snapshot unprotect failed
        """
        with self.get_connection(conffile=self.conf_file,
                                 rados_id=self.user) as conn:
            with conn.open_ioctx(target_pool) as ioctx:
                try:
                    # First remove snapshot.
                    if snapshot_name is not None:
                        with rbd.Image(ioctx, p_w_picpath_name) as p_w_picpath:
                            try:
                                p_w_picpath.unprotect_snap(snapshot_name)
                                p_w_picpath.remove_snap(snapshot_name)
                            except rbd.ImageNotFound as exc:
                                msg = (_("Snap Operating Exception "
                                         "%(snap_exc)s "
                                         "Snapshot does not exist.") %
                                       {'snap_exc': exc})
                                LOG.debug(msg)
                            except rbd.ImageBusy as exc:
                                log_msg = (_LE("Snap Operating Exception "
                                               "%(snap_exc)s "
                                               "Snapshot is in use.") %
                                           {'snap_exc': exc})
                                LOG.error(log_msg)
                                raise exceptions.InUseByStore()

                    # Then delete p_w_picpath.
                    rbd.RBD().remove(ioctx, p_w_picpath_name)
                except rbd.ImageHasSnapshots:
                    log_msg = (_LE("Remove p_w_picpath %(img_name)s failed. "
                                   "It has snapshot(s) left.") %
                               {'img_name': p_w_picpath_name})
                    LOG.error(log_msg)
                    raise exceptions.HasSnapshot()
                except rbd.ImageBusy:
                    log_msg = (_LE("Remove p_w_picpath %(img_name)s failed. "
                                   "It is in use.") %
                               {'img_name': p_w_picpath_name})
                    LOG.error(log_msg)
                    raise exceptions.InUseByStore()
                except rbd.ImageNotFound:
                    msg = _("RBD p_w_picpath %s does not exist") % p_w_picpath_name
                    raise exceptions.NotFound(message=msg)


参考链接

eventlet常用函数介绍 http://www.cnblogs.com/Security-Darren/p/4168233.html


以上过程,理解不对的地方,还请指正,见谅!