openstack冷迁移/Resize源码分析(二)

接上一篇 openstack冷迁移/Resize源码分析(一)

执行冷迁移/Resize

/nova_queens/nova/compute/manager.py

    @wrap_exception()
    @reverts_task_state
    @wrap_instance_event(prefix='compute')
    @wrap_instance_fault
    def resize_instance(self, context, instance, image,
                        migration, instance_type, clean_shutdown,
                        request_spec=None):
        """开始将正在运行的实例迁移到另一台主机。
        这是从目标主机的“prep_resize”例程启动的,并在源主机上运行。
        """
        try:
            self._resize_instance(context, instance, image, migration,
                                  instance_type, clean_shutdown, request_spec)
        except Exception:
            with excutils.save_and_reraise_exception():
                self._revert_allocation(context, instance, migration)

    def _resize_instance(self, context, instance, image,
                         migration, instance_type, clean_shutdown,
                         request_spec):
        # 传递 instance_state=instance.vm_state 
        # 因为我们可以调整 STOPPED 服务器的大小,
        # 并且我们不想在 migrate_disk_and_power_off 引发 InstanceFaultRollback 的情况下将其设置回 ACTIVE。
        instance_state = instance.vm_state
        with self._error_out_instance_on_exception(
                context, instance, instance_state=instance_state), \
             errors_out_migration_ctxt(migration):

            # 获取网络信息
            network_info = self.network_api.get_instance_nw_info(context,
                                                                 instance)

            migration.status = 'migrating'
            migration.save()

            instance.task_state = task_states.RESIZE_MIGRATING
            instance.save(expected_task_state=task_states.RESIZE_PREP)

            # 获取磁盘信息
            bdms = objects.BlockDeviceMappingList.get_by_instance_uuid(
                    context, instance.uuid)
            self._send_resize_instance_notifications(
                context, instance, bdms, network_info,
                fields.NotificationPhase.START)

            block_device_info = self._get_instance_block_device_info(
                                context, instance, bdms=bdms)

            # 获取虚拟机的停机超时和重试信息
            timeout, retry_interval = self._get_power_off_values(
                instance, clean_shutdown)
            # 关闭虚拟机电源并迁移虚拟机磁盘文件
            # 去 libvirt 下 driver.py 文件查找 migrate_disk_and_power_off 函数
            disk_info = self.driver.migrate_disk_and_power_off(
                    context, instance, migration.dest_host,
                    instance_type, network_info,
                    block_device_info,
                    timeout, retry_interval)

            # 断开虚拟机的共享块设备连接
            self._terminate_volume_connections(context, instance, bdms)

            # 迁移虚拟机网络
            self.network_api.migrate_instance_start(context,
                                                    instance,
                                                    migration)

            migration.status = 'post-migrating'
            migration.save()

            # 修改虚拟机的主机记录
            instance.host = migration.dest_compute
            instance.node = migration.dest_node
            instance.task_state = task_states.RESIZE_MIGRATED
            instance.save(expected_task_state=task_states.RESIZE_MIGRATING)

            # RPC 转换到目标主机以完成调整大小or迁移。
            # 去nova/compute/manager.py下找finish_resize的实现
            self.compute_rpcapi.finish_resize(context, instance,
                migration, image, disk_info, migration.dest_compute,
                request_spec)

        self._send_resize_instance_notifications(
            context, instance, bdms, network_info,
            fields.NotificationPhase.END)
        self.instance_events.clear_events_for_instance(instance)

关闭电源+磁盘迁移

/nova_queens/nova/virt/libvirt/driver.py

    def migrate_disk_and_power_off(self, context, instance, dest,
                                   flavor, network_info,
                                   block_device_info=None,
                                   timeout=0, retry_interval=0):
        LOG.debug("Starting migrate_disk_and_power_off",
                   instance=instance)

        # 获取临时盘信息
        ephemerals = driver.block_device_info_get_ephemerals(block_device_info)

        # 如果新实例请求的块设备映射不包含临时设备,则 get_bdm_ephemeral_disk_size() 将返回 0。 
        # 但是,我们仍然想检查是否设置了原始实例的 ephemeral_gb 属性并确保新请求的flavor临时大小更大
        eph_size = (block_device.get_bdm_ephemeral_disk_size(ephemerals) or
                    instance.flavor.ephemeral_gb)

        # 检查迁移是否需要缩小磁盘大小。
        root_down = flavor.root_gb < instance.flavor.root_gb
        ephemeral_down = flavor.ephemeral_gb < eph_size

        # 检查虚拟机是否通过卷启动
        booted_from_volume = self._is_booted_from_volume(block_device_info)

        # 本地磁盘文件不能 Resize
        if (root_down and not booted_from_volume) or ephemeral_down:
            reason = _("Unable to resize disk down.")
            raise exception.InstanceFaultRollback(
                exception.ResizeError(reason=reason))

        # NOTE(dgenin): 没有为 LVM backed 的虚拟机提供迁移支持.
        # 也就是说 LVM Backend 且不是卷启动的虚拟机(也就是镜像启动用本地存储的虚拟机)不能迁移
        if CONF.libvirt.images_type == 'lvm' and not booted_from_volume:
            reason = _("Migration is not supported for LVM backed instances")
            raise exception.InstanceFaultRollback(
                exception.MigrationPreCheckError(reason=reason))

        # 将磁盘复制到目标
        # 首先将实例目录重命名为 +_resize 以使用实例目录的共享存储。
        inst_base = libvirt_utils.get_instance_path(instance)
        inst_base_resize = inst_base + "_resize"

        # 判断共享存储
        shared_instance_path = self._is_path_shared_with(dest, inst_base)

        # 如果失败,请尝试在远程计算节点上创建目录,
        # 我们将异常向上传递堆栈,以便我们可以更早地在此处捕获故障
        if not shared_instance_path:
            try:
                # 非共享存储:通过 SSH 在目的主机上创建虚拟机目录
                self._remotefs.create_dir(dest, inst_base)
            except processutils.ProcessExecutionError as e:
                reason = _("not able to execute ssh command: %s") % e
                raise exception.InstanceFaultRollback(
                    exception.ResizeError(reason=reason))
        # 关闭虚拟机电源
        self.power_off(instance, timeout, retry_interval)

        # 卸载共享块设备
        block_device_mapping = driver.block_device_info_get_mapping(
            block_device_info)
        for vol in block_device_mapping:
            connection_info = vol['connection_info']
            self._disconnect_volume(context, connection_info, instance)

        # 获取 disk.info 配置文件内容
        # 记录了 Root Disk、Ephemeral Disk、Swap Disk 的 file paths
        disk_info = self._get_instance_disk_info(instance, block_device_info)

        try:
            os.rename(inst_base, inst_base_resize)
            # 如果我们使用共享实例路径迁移实例,则创建目录。
            # 如果是远程节点,则目录已经创建
            if shared_instance_path:
                # 共享存储:目的主机看作是自己
                dest = None
                fileutils.ensure_tree(inst_base)

            on_execute = lambda process: \
                self.job_tracker.add_job(instance, process.pid)
            on_completion = lambda process: \
                self.job_tracker.remove_job(instance, process.pid)

            # 块迁移虚拟机本地磁盘文件
            for info in disk_info:
                # 假设 inst_base == dirname(info['path'])
                img_path = info['path']
                fname = os.path.basename(img_path)
                from_path = os.path.join(inst_base_resize, fname)

                # 我们不会在这里复制交换磁盘,而是依靠 finish_migration 为我们重新创建它。 
                # 这是可以的,因为操作系统已关闭,并且由于重新创建交换磁盘非常便宜,因此它比本地复制或通过网络复制更有效。 这也意味着我们不必调整它的大小。
                if fname == 'disk.swap':
                    continue
                # 是否启用压缩
                compression = info['type'] not in NO_COMPRESSION_TYPES
                # 非共享存储:使用 scp 远程拷贝
                # 共享存储:使用 cp 本地拷贝
                libvirt_utils.copy_image(from_path, img_path, host=dest,
                                         on_execute=on_execute,
                                         on_completion=on_completion,
                                         compression=compression)

            # 确保将 disk.info 写入新路径,以避免重新检查磁盘并可能更改格式。
            # 拷贝 diks.inof 配置文件
            src_disk_info_path = os.path.join(inst_base_resize, 'disk.info')
            if os.path.exists(src_disk_info_path):
                dst_disk_info_path = os.path.join(inst_base, 'disk.info')
                libvirt_utils.copy_image(src_disk_info_path,
                                         dst_disk_info_path,
                                         host=dest, on_execute=on_execute,
                                         on_completion=on_completion)

            # 如果需要,处理 vTPM 数据的迁移
            libvirt_utils.save_and_migrate_vtpm_dir(
                instance.uuid, inst_base_resize, inst_base, dest,
                on_execute, on_completion)

        except Exception:
            with excutils.save_and_reraise_exception():
                self._cleanup_remote_migration(dest, inst_base,
                                               inst_base_resize,
                                               shared_instance_path)

        return jsonutils.dumps(disk_info)

/nova_queens/nova/compute/manager.py

    @wrap_exception()
    @reverts_task_state
    @wrap_instance_event(prefix='compute')
    @errors_out_migration
    @wrap_instance_fault
    def finish_resize(self, context, disk_info, image, instance,
                      migration, request_spec=None):
        """
        完成迁移过程。设置新传输的磁盘并在其新主机上打开实例。
        """
        try:
            # 调用本py文件的_finish_resize_helper函数
            self._finish_resize_helper(context, disk_info, image, instance,
                                       migration, request_spec)
        except Exception:
            with excutils.save_and_reraise_exception():
                # 此时,resize_instance(在源上运行)已经更新了实例主机/节点值以指向此(dest)计算
                # 因此我们需要保持对 dest 节点资源提供者的分配不变,并删除对 源节点资源提供者。 
                # 如果用户试图通过硬重启来恢复服务器,它会发生在这个主机上,所以这就是分配应该去的地方。 
                # 请注意,这与从 confirm_resize 调用以清除迁移记录持有的源节点分配的方法相同。

                """
                在 finish_resize 失败后删除源节点实例上旧flavor的分配。 您可以通过硬重启来恢复实例。
                """
                LOG.info('Deleting allocations for old flavor on source node '
                         '%s after finish_resize failure. You may be able to '
                         'recover the instance by hard rebooting it.',
                         migration.source_compute, instance=instance)
                self._delete_allocation_after_move(
                    context, instance, migration)

同一个py文件

    def _finish_resize_helper(self, context, disk_info, image, instance,
                              migration):
        """完成迁移过程。如果迁移过程失败,调用者必须恢复实例的分配。"""
        ...
            # 调用本py文件的的_finish_resize
            network_info = self._finish_resize(context, instance, migration,
                                               disk_info, image_meta, bdms)
        ...

同一个py文件

    def _finish_resize(self, context, instance, migration, disk_info,
                       image_meta, bdms):
        resize_instance = False
        ...
        # 判断migration中的instance_type_id,看执行冷迁移还是resize
        if old_instance_type_id != new_instance_type_id:
            ...
                if old_instance_type[key] != instance_type[key]:
                    resize_instance = True
                    ...

        # 在目标主机上设置网络
        self.network_api.setup_networks_on_host(context, instance,
                                                migration['dest_compute'])
        ...
        # 获取当前云主机的网络信息
        network_info = self.network_api.get_instance_nw_info(context, instance)
        # 更新数据库虚拟机状态为 RESIZE_FINISH
        instance.task_state = task_states.RESIZE_FINISH
        instance.save(expected_task_state=task_states.RESIZE_MIGRATED)
        ...
        # 使用目标主机连接器更新所有卷附件
        # 以便我们可以在调用 driver.finish_migration 之前更新 BDM.connection_info
        # 否则驱动程序将不知道如何将卷连接到该主机。
        self._update_volume_attachments(context, instance, bdms)

        block_device_info = self._get_instance_block_device_info(
            context, instance, refresh_conn_info=True, bdms=bdms)

        # 如果原始 vm_state 为 STOPPED,我们不会在实例迁移后自动启动实例
        power_on = old_vm_state != vm_states.STOPPED

        try:
            # 调用libvirt driver
            # 去libvirt下driver.py文件查找finish_migration函数
            # nova/virt/libvirt/driver.py
            self.driver.finish_migration(context, migration, instance,
                                         disk_info,
                                         network_info,
                                         image_meta, resize_instance,
                                         block_device_info, power_on)
        ...

完成迁移,libvirt根据xml启动虚拟机

nova_queens/nova/virt/libvirt/driver.py

    def finish_migration(self, context, migration, instance, disk_info,
                         network_info, image_meta, resize_instance,
                         block_device_info=None, power_on=True):
        LOG.debug("Starting finish_migration", instance=instance)
        # 迁移磁盘信息(仅仅对于root_gb与ephemeral_gb)
        block_disk_info = blockinfo.get_disk_info(CONF.libvirt.virt_type,
                                                  instance,
                                                  image_meta,
                                                  block_device_info)
        # 创建镜像
        self._create_image(context, instance, block_disk_info['mapping'],
                           block_device_info=block_device_info,
                           ignore_bdi_for_swap=True,
                           fallback_from_host=migration.source_compute)
        # 获取磁盘信息(例如virio、disk.local、disk.swap)
        disk_info = jsonutils.loads(disk_info)
        ...
            if (disk_name != 'disk.config' and
                        info['type'] == 'raw' and CONF.use_cow_images):
                self._disk_raw_to_qcow2(info['path'])
        # 生成xml文件,就是虚拟机的xml
        xml = self._get_guest_xml(context, instance, network_info,
                                  block_disk_info, image_meta,
                                  block_device_info=block_device_info)
        # 根据配置及xml文件创建虚拟机
        guest = self._create_domain_and_network(context, xml, instance,
                                        network_info,
                                        block_device_info=block_device_info,
                                        power_on=power_on,
                                        vifs_already_plugged=True,
                                        post_xml_callback=gen_confdrive)
        #如果迁移前为开机状态,则开机(至此迁移全部完成)
        if power_on:
            timer = loopingcall.FixedIntervalLoopingCall(
                                                    self._wait_for_running,
                                                    instance)
            timer.start(interval=0.5).wait()
            # 迁移完成,同步guest时间
            guest.sync_guest_time()
        LOG.debug("finish_migration finished successfully.", instance=instance)

进行confirm_resize检查,成功则清理源主机,失败则回滚

nova_queens/nova/api/openstack/compute/servers.py

    @wsgi.response(204)
    @wsgi.expected_errors((400, 404, 409))
    @wsgi.action('confirmResize')
    def _action_confirm_resize(self, req, id, body):
        context = req.environ['nova.context']
        context.can(server_policies.SERVERS % 'confirm_resize')
        # 从数据库获取实例信息
        instance = self._get_server(context, req, id)
        try:
            # 调用 compute 下的 api.py 文件的 confirm_resize 函数
            self.compute_api.confirm_resize(context, instance)
        except exception.InstanceUnknownCell as e:
            raise exc.HTTPNotFound(explanation=e.format_message())
        except exception.MigrationNotFound:
            msg = _("Instance has not been resized.")
            raise exc.HTTPBadRequest(explanation=msg)
        except exception.InstanceIsLocked as e:
            raise exc.HTTPConflict(explanation=e.format_message())
        except exception.InstanceInvalidState as state_error:
            common.raise_http_conflict_for_instance_invalid_state(state_error,
                    'confirmResize', id)

nova_queens/nova/compute/api.py

    @check_instance_lock
    @check_instance_cell
    @check_instance_state(vm_state=[vm_states.RESIZED])
    def confirm_resize(self, context, instance, migration=None):
        """确认迁移/调整大小并删除“旧”实例。"""
        elevated = context.elevated()
        # 判断 migration 是否已经完成
        if migration is None:
            # 已完成则变更为 finished
            migration = objects.Migration.get_by_instance_and_status(
                elevated, instance.uuid, 'finished')

        # 未完成则设置迁移状态为正在confirming
        migration.status = 'confirming'
        migration.save()
        # 通知实例确认resize
        self._record_action_start(context, instance,
                                  instance_actions.CONFIRM_RESIZE)
        # prc 调用 compute 下的 manager.py 的 confirm_resize 函数
        self.compute_rpcapi.confirm_resize(context,
                                           instance,
                                           migration,
                                           migration.source_compute)

nova_queens/nova/compute/manager.py

    @wrap_exception()
    @wrap_instance_event(prefix='compute')
    @errors_out_migration
    @wrap_instance_fault
    def confirm_resize(self, context, instance, migration):
        """确认迁移/调整大小并删除“旧”实例。
        这是从 API 调用并在源主机上运行的。
        此时目标主机上不需要发生任何事情,因为实例已经在那里运行。 这个例程只是清理源主机。
        """
        @utils.synchronized(instance.uuid)
        def do_confirm_resize(context, instance, migration_id):
            LOG.debug("Going to confirm migration %s", migration_id,
                      instance=instance)
            try:
                # 从migrate表中获取该instance resize记录
                migration = objects.Migration.get_by_id(
                                    context.elevated(), migration_id)
            except exception.MigrationNotFound:
                LOG.error("Migration %s is not found during confirmation",
                          migration_id, instance=instance)
                return
            # 如果已经被确认,则不能再次确认
            if migration.status == 'confirmed':
                LOG.info("Migration %s is already confirmed",
                         migration_id, instance=instance)
                return
            # 如果迁移状态不是完成或者正在迁移,则数据有异常,直接return退出
            elif migration.status not in ('finished', 'confirming'):
                LOG.warning("Unexpected confirmation status '%(status)s' "
                            "of migration %(id)s, exit confirmation process",
                            {"status": migration.status, "id": migration_id},
                            instance=instance)
                return

            # 从数据库中获取实例,如果已经被删除,就什么都不做,直接返回这里
            expected_attrs = ['metadata', 'system_metadata', 'flavor']
            try:
                # 根据uuid获取instance信息
                instance = objects.Instance.get_by_uuid(
                        context, instance.uuid,
                        expected_attrs=expected_attrs)
            except exception.InstanceNotFound:
                LOG.info("Instance is not found during confirmation",
                         instance=instance)
                return

            ...
                    # 调用本py文件的_confirm_resize
                    self._confirm_resize(
                    ...
        # 回调自身,直到满足条件退出
        do_confirm_resize(context, instance, migration.id)

同一个py文件

    def _confirm_resize(self, context, instance, migration=None):
        """销毁源实例。"""

        # 通知实例确认 resize.confirm 开始
        self._notify_about_instance_usage(context, instance,
                                          "resize.confirm.start")
        compute_utils.notify_about_instance_action(context, instance,
            self.host, action=fields.NotificationAction.RESIZE_CONFIRM,
            phase=fields.NotificationPhase.START)

        # 删除之前保存的迁移信息,并更新 instance 为新的 flavor 数据
        old_instance_type = instance.old_flavor
        instance.old_flavor = None
        instance.new_flavor = None
        instance.system_metadata.pop('old_vm_state', None)
        instance.save()

        # 清理源主机的网络信息
        self.network_api.setup_networks_on_host(context, instance,
                           migration.source_compute, teardown=True)
        # 获取当前实例的网络信息
        network_info = self.network_api.get_instance_nw_info(context,
                                                             instance)

        # NOTE(adrianc): Populate old PCI device in VIF profile
        # to allow virt driver to properly unplug it from Hypervisor.
        pci_mapping = (instance.migration_context.
                       get_pci_mapping_for_migration(True))
        network_info = self._get_updated_nw_info_with_pci_mapping(
            network_info, pci_mapping)

        # 在此处获取 BDM 并将它们传递给驱动程序。
        # 调用 libvirt 的 dirver.py 的 confirm_migration 函数
        self.driver.confirm_migration(context, migration, instance,
                                      network_info)
        # 设置迁移状态为确认完成
        migration.status = 'confirmed'
        with migration.obj_as_admin():
            migration.save()

        rt = self._get_resource_tracker()
        rt.drop_move_claim(context, instance, migration.source_node,
                           old_instance_type, prefix='old_')
        instance.drop_migration_context()

        # NOTE(mriedem): The old_vm_state could be STOPPED but the user
        # might have manually powered up the instance to confirm the
        # resize/migrate, so we need to check the current power state
        # on the instance and set the vm_state appropriately. We default
        # to ACTIVE because if the power state is not SHUTDOWN, we
        # assume _sync_instance_power_state will clean it up.
        p_state = instance.power_state
        vm_state = None
        if p_state == power_state.SHUTDOWN:
            vm_state = vm_states.STOPPED
            LOG.debug("Resized/migrated instance is powered off. "
                      "Setting vm_state to '%s'.", vm_state,
                      instance=instance)
        else:
            vm_state = vm_states.ACTIVE

        instance.vm_state = vm_state
        instance.task_state = None
        instance.save(expected_task_state=[None, task_states.DELETING,
                                           task_states.SOFT_DELETING])
        # 通知实例 resize.confirm 确认完成
        self._notify_about_instance_usage(
            context, instance, "resize.confirm.end",
            network_info=network_info)
        compute_utils.notify_about_instance_action(context, instance,
               self.host, action=fields.NotificationAction.RESIZE_CONFIRM,
               phase=fields.NotificationPhase.END)

confirm成功,向 libvirt 发出命令,摧毁源主机

nova_queens/nova/virt/libvirt/driver.py

    def confirm_migration(self, context, migration, instance, network_info):
        """确认调整大小,摧毁源虚拟机。"""
        # 调用本py文件的 _cleanup_resize 函数
        self._cleanup_resize(context, instance, network_info)

同一个py文件

    def _cleanup_resize(self, context, instance, network_info):
        inst_base = libvirt_utils.get_instance_path(instance)
        target = inst_base + '_resize'

        if os.path.exists(target):
            # 删除_resize文件夹,为了避免失败,重试5次
            utils.execute('rm', '-rf', target, delay_on_retry=True,
                          attempts=5)
        ...
        if instance.host != CONF.host:
            # 调用libvirt接口,将该虚拟机undefine
            self._undefine_domain(instance)
            # 网络及防火墙信息、配置清理
            self.unplug_vifs(instance, network_info)
            self.unfilter_instance(instance, network_info)

confirm失败,revert_resize回滚

nova_queens/nova/api/openstack/compute/servers.py

    @wsgi.response(202)
    @wsgi.expected_errors((400, 404, 409))
    @wsgi.action('revertResize')
    def _action_revert_resize(self, req, id, body):
        context = req.environ['nova.context']
        context.can(server_policies.SERVERS % 'revert_resize')
        # 从数据库获取instance信息
        instance = self._get_server(context, req, id)
        try:
            # 调用 compute 下的 api.py 的 revert_resize 函数
            self.compute_api.revert_resize(context, instance)
        ...

/nova/nova/compute/api.py

    @check_instance_lock
    @check_instance_cell
    @check_instance_state(vm_state=[vm_states.RESIZED])
    def revert_resize(self, context, instance):
        """恢复调整大小,删除过程中的“新”实例。"""

        # 回滚resize,并将新实例删除
        elevated = context.elevated()
        # 从数据库migration 表中,获取迁移信息
        migration = objects.Migration.get_by_instance_and_status(
            elevated, instance.uuid, 'finished')

        # 如果这是调整大小,则还原可能会超过配额。
        self._check_quota_for_upsize(context, instance, instance.flavor,
                                     instance.old_flavor)
        ...
        # 设置虚拟机任务状态为 RESIZE_REVERTING
        instance.task_state = task_states.RESIZE_REVERTING
        instance.save(expected_task_state=[None])

        # 迁移状态设置为 reverting
        migration.status = 'reverting'
        migration.save()
        # 通知实例回滚 resize
        self._record_action_start(context, instance,
                                  instance_actions.REVERT_RESIZE)

        # 在初始调整大小操作期间,Conductor 更新了 RequestSpec.flavor 以指向新的flavor
        # 因此我们需要更新 RequestSpec 以指向旧的flavor,否则通过调度程序进行的后续移动操作将使用错误的flavor。
        try:
            reqspec = objects.RequestSpec.get_by_instance_uuid(
                context, instance.uuid)
            reqspec.flavor = instance.old_flavor
            reqspec.save()
        ...
        #prc 调用 compute下的 mannager.py 中的 revert_resize
        self.compute_rpcapi.revert_resize(context, instance,
                                          migration,
                                          migration.dest_compute)

nova_queens/nova/compute/manager.py

    @wrap_exception()
    @reverts_task_state
    @wrap_instance_event(prefix='compute')
    @errors_out_migration
    @wrap_instance_fault
    def revert_resize(self, context, instance, migration):
        """
        销毁目标机器上的新实例。 回滚resize改变,并给源主机上的旧实例开机。
        """
        # revert_resize 本质上是将大小调整回旧大小,因此我们需要在此处发送 usage 事件。
        compute_utils.notify_usage_exists(self.notifier, context, instance,
                                          current_period=True)

        with self._error_out_instance_on_exception(context, instance):
            # 目标主机的网络信息清除
            self.network_api.setup_networks_on_host(context, instance,
                                                    teardown=True)
            # 获取原始的实例,迁移信息
            migration_p = obj_base.obj_to_primitive(migration)
            # 网络迁移开始
            self.network_api.migrate_instance_start(context,
                                                    instance,
                                                    migration_p)
            # 获取实例网络及磁盘设备信息
            network_info = self.network_api.get_instance_nw_info(context,
                                                                 instance)
            bdms = objects.BlockDeviceMappingList.get_by_instance_uuid(
                    context, instance.uuid)
            block_device_info = self._get_instance_block_device_info(
                                context, instance, bdms=bdms)

            destroy_disks = not self._is_instance_storage_shared(
                context, instance, host=migration.source_compute)
            # 销毁目标主机上的新实例
            self.driver.destroy(context, instance, network_info,
                                block_device_info, destroy_disks)
            # 断开与卷连接
            self._terminate_volume_connections(context, instance, bdms)
            # 设置迁移状态
            migration.status = 'reverted'
            ...
            # RPC 转换回源主机以在那里完成还原。
            # prc调用,去找 compute 下的 mannager.py 的 finish_revert_resize 函数
            self.compute_rpcapi.finish_revert_resize(context, instance,
                    migration, migration.source_compute)

同一个py文件

    @wrap_exception()
    @reverts_task_state
    @wrap_instance_event(prefix='compute')
    @errors_out_migration
    @wrap_instance_fault
    def finish_revert_resize(self, context, instance, migration):
        """
        完成在源主机上恢复调整大小的后半部分。 
        恢复原始源实例状态(活动/关闭)并恢复数据库中调整大小的属性。
        """
        ...
            # 通知 resize.revert 回滚开始
            self._notify_about_instance_usage(
                    context, instance, "resize.revert.start")
            compute_utils.notify_about_instance_action(context, instance,
                self.host, action=fields.NotificationAction.RESIZE_REVERT,
                    phase=fields.NotificationPhase.START, bdms=bdms)

            # 回滚数据库实例信息
            old_vm_state = instance.system_metadata.pop('old_vm_state',
                                                        vm_states.ACTIVE)

            self._set_instance_info(instance, instance.old_flavor)
            instance.old_flavor = None
            instance.new_flavor = None
            instance.host = migration.source_compute
            instance.node = migration.source_node
            instance.save()

            self._revert_allocation(context, instance, migration)
            # 源主机上创建网络
            self.network_api.setup_networks_on_host(context, instance,
                                                    migration.source_compute)
            migration_p = obj_base.obj_to_primitive(migration)
            ...
            migration_p['dest_compute'] = migration.source_compute
            # 迁移虚拟机网络
            self.network_api.migrate_instance_finish(context,
                                                     instance,
                                                     migration_p)
            network_info = self.network_api.get_instance_nw_info(context,
                                                                 instance)
            ...
            self._update_volume_attachments(context, instance, bdms)
            # 获取磁盘信息
            block_device_info = self._get_instance_block_device_info(
                    context, instance, refresh_conn_info=True, bdms=bdms)

            power_on = old_vm_state != vm_states.STOPPED
            # 完成回滚迁移,调用 libvirt 的 drivirt.py 文件的 finish_revert_migration 函数
            self.driver.finish_revert_migration(context, instance,
                                       network_info,
                                       block_device_info, power_on)

            instance.drop_migration_context()
            instance.launched_at = timeutils.utcnow()
            # 设置虚拟机状态信息
            instance.save(expected_task_state=task_states.RESIZE_REVERTING)
            self._complete_volume_attachments(context, bdms)
            LOG.info("Updating instance to original state: '%s'",
                     old_vm_state, instance=instance)
            #设置虚拟机相应运行状态
            if power_on:
                instance.vm_state = vm_states.ACTIVE
                instance.task_state = None
                instance.save()
            else:
                instance.task_state = task_states.POWERING_OFF
                instance.save()
                self.stop_instance(context, instance=instance,
                                   clean_shutdown=True)
            # 通知 resize.revert 完成
            self._notify_about_instance_usage(
                    context, instance, "resize.revert.end")
            compute_utils.notify_about_instance_action(context, instance,
                self.host, action=fields.NotificationAction.RESIZE_REVERT,
                    phase=fields.NotificationPhase.END, bdms=bdms)

向libvirt发出命令,根据xml启动虚拟机

nova_queens/nova/virt/libvirt/driver.py

    def finish_revert_migration(self, context, instance, network_info,
                                block_device_info=None, power_on=True):
        LOG.debug("开始finish_revert_migration",
                  instance=instance)
        # 获取实例路径及resize路径
        inst_base = libvirt_utils.get_instance_path(instance)
        inst_base_resize = inst_base + "_resize"

        # 如果我们正在从失败的迁移中恢复,请确保我们没有遗留的会发生冲突的相同主机基本目录。 
        # 实现就是如果inst_base_resize 路径存在,就清理掉,保证没有冲突目录
        if os.path.exists(inst_base_resize):
            self._cleanup_failed_migration(inst_base)
            utils.execute('mv', inst_base_resize, inst_base)
        # 快照处理
        root_disk = self.image_backend.by_name(instance, 'disk')
        if root_disk.exists():
            try:
                root_disk.rollback_to_snap(libvirt_utils.RESIZE_SNAPSHOT_NAME)
            except exception.SnapshotNotFound:
                LOG.warning("Failed to rollback snapshot (%s)",
                            libvirt_utils.RESIZE_SNAPSHOT_NAME)
            finally:
                root_disk.remove_snap(libvirt_utils.RESIZE_SNAPSHOT_NAME,
                                      ignore_errors=True)
        # 获取磁盘信息
        disk_info = blockinfo.get_disk_info(CONF.libvirt.virt_type,
                                            instance,
                                            instance.image_meta,
                                            block_device_info)
        # 根据上述信息及配置,生成xml文件,创建虚拟机
        xml = self._get_guest_xml(context, instance, network_info, disk_info,
                                  instance.image_meta,
                                  block_device_info=block_device_info)
        self._create_domain_and_network(context, xml, instance, network_info,
                                        block_device_info=block_device_info,
                                        power_on=power_on,
                                        vifs_already_plugged=True)
        if power_on:
            timer = loopingcall.FixedIntervalLoopingCall(
                                                    self._wait_for_running,
                                                    instance)
            timer.start(interval=0.5).wait()

        LOG.debug("finish_revert_migration finished successfully.",
                  instance=instance)

冷迁移/Resize相对详细的流程基本就是这样,但是在准备时期具体的quota处理、回滚时期资源、quota的细节上还没有深入。
如果要做增强开发,或者增加热规格调整这样的新特性的话,这几点基本是避不开的。
但是楼主这个懒货还要睡懒觉、做好吃的、喂猫猫、打游戏,看NASA新出的片,估计要到猴年马月了

参考文献

https://www.codetd.com/article/9445368
https://blog.csdn.net/tantexian/article/details/41444461
https://blog.csdn.net/tantexian/article/details/41519135
https://blog.csdn.net/tantexian/article/details/41519179

评论 2
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值