上一节我们分析了冷迁移和resize上层各自的逻辑,本节主要讲解两者底层相同的代码逻辑。其中compute_api.resize()方法会进一步调用nova/compute/api.py.resize()的方法。部分代码和注释如下所示:
@wrap_check_policy
@check_instance_lock
@check_instance_cell
@check_instance_state(vm_state=[vm_states.ACTIVE, vm_states.STOPPED])
def resize(self, context, instance, flavor_id=None, clean_shutdown=True,
**extra_instance_updates):
"""Resize (ie, migrate) a running instance.
If flavor_id is None, the process is considered a migration, keeping
the original flavor_id. If flavor_id is not None, the instance should
be migrated to a new host and resized to the new flavor_id.
"""
self._check_auto_disk_config(instance, **extra_instance_updates)
current_instance_type = instance.get_flavor()
#如果flavor_id为空,instance_type不变;否则使用前端传入的flavor_id
if not flavor_id:
LOG.debug("flavor_id is None. Assuming migration.",
instance=instance)
new_instance_type = current_instance_type
else:
new_instance_type = flavors.get_flavor_by_flavor_id(
flavor_id, read_deleted="no")
if (new_instance_type.get('root_gb') == 0 and
current_instance_type.get('root_gb') != 0 and
not self.is_volume_backed_instance(context, instance)):
reason = _('Resize to zero disk flavor is not allowed.')
raise exception.CannotResizeDisk(reason=reason)
if not new_instance_type:
raise exception.FlavorNotFound(flavor_id=flavor_id)
current_instance_type_name = current_instance_type['name']
new_instance_type_name = new_instance_type['name']
LOG.debug("Old instance type %(current_instance_type_name)s, "
"new instance type %(new_instance_type_name)s",
{'current_instance_type_name': current_instance_type_name,
'new_instance_type_name': new_instance_type_name},
instance=instance)
same_instance_type = (current_instance_type['id'] ==
new_instance_type['id'])
# NOTE(sirp): We don't want to force a customer to change their flavor
# when Ops is migrating off of a failed host.
if not same_instance_type and new_instance_type.get('disabled'):
raise exception.FlavorNotFound(flavor_id=flavor_id)
if same_instance_type and flavor_id and self.cell_type != 'compute':
raise exception.CannotResizeToSameFlavor()
# ensure there is sufficient headroom for upsizes
if flavor_id:
#计算resize所需的资源配额,主要统计vcpu和内存
deltas = compute_utils.upsize_quota_delta(context,
new_instance_type,
current_instance_type)
try:
#检查更新项目配额
quotas = compute_utils.reserve_quota_delta(context, deltas,
instance)
except exception.OverQuota as exc:
quotas = exc.kwargs['quotas']
overs = exc.kwargs['overs']
usages = exc.kwargs['usages']
headroom = self._get_headroom(quotas, usages, deltas)
(overs, reqs, total_alloweds,
useds) = self._get_over_quota_detail(headroom, overs, quotas,
deltas)
LOG.warning(_LW("%(overs)s quota exceeded for %(pid)s,"
" tried to resize instance."),
{'overs': overs, 'pid': context.project_id})
raise exception.TooManyInstances(overs=overs,
req=reqs,
used=useds,
allowed=total_alloweds)
else:
quotas = objects.Quotas(context=context)
#将instance task_state状态设置为RESIZE_PREP
instance.task_state = task_states.RESIZE_PREP
instance.progress = 0
#更新实例状态到数据库
instance.update(extra_instance_updates)
instance.save(expected_task_state=[None])
filter_properties = {'ignore_hosts': []}
#判断是否可以resize至本机
#如果nova.conf中allow_resize_to_same_host==false,则将自身host添加到
#ignore_hosts列表中,后续调度则不会将实例resize到本机
if not CONF.allow_resize_to_same_host:
filter_properties['ignore_hosts'].append(instance.host)
if self.cell_type == 'api':
# Commit reservations early and create migration record.
self._resize_cells_support(context, quotas, instance,
current_instance_type,
new_instance_type)
if not flavor_id:
#更新数据库信息,如果flavor_id为空,表示冷迁移,否则表示resize
self._record_action_start(context, instance,
instance_actions.MIGRATE)
else:
self._record_action_start(context, instance,
instance_actions.RESIZE)
scheduler_hint = {'filter_properties': filter_properties}
#调用conductor api,通过conductor rpc将请求转发给conductor manager
self.compute_task_api.resize_instance(context, instance,
extra_instance_updates, scheduler_hint=scheduler_hint,
flavor=new_instance_type,
reservations=quotas.reservations or [],
clean_shutdown=clean_shutdown)
接着调用nova/conductor/api.py.ComputeTaskAPI.resize_instance()方法,其代码如下:
def resize_instance(self, context, instance, extra_instance_updates,
scheduler_hint, flavor, reservations,
clean_shutdown=True):
# NOTE(comstud): 'extra_instance_updates' is not used here but is
# needed for compatibility with the cells_rpcapi version of this
# method.
self.conductor_compute_rpcapi.migrate_server(
context, instance, scheduler_hint, live=False, rebuild=False,
flavor=flavor, block_migration=None, disk_over_commit=None,
reservations=reservations, clean_shutdown=clean_shutdown)
接着继续调用nova/conductor/rpcapi.py.ComputeTaskAPI.migrate_server()方法,其代码如下:
def migrate_server(self, context, instance, scheduler_hint, live, rebuild,
flavor, block_migration, disk_over_commit,
reservations=None, clean_shutdown=True):
#此处传入live==false,即为冷迁移
kw = {'instance': instance, 'scheduler_hint': scheduler_hint,
'live': live, 'rebuild': rebuild, 'flavor': flavor,
'block_migration': block_migration,
'disk_over_commit': disk_over_commit,
'reservations': reservations,
'clean_shutdown': clean_shutdown}
version = '1.11'
if not self.client.can_send_version(version):
del kw['clean_shutdown']
version = '1.10'
if not self.client.can_send_version(version):
kw['flavor'] = objects_base.obj_to_primitive(flavor)
version = '1.6'
if not self.client.can_send_version(version):
kw['instance'] = jsonutils.to_primitive(
objects_base.obj_to_primitive(instance))
version = '1.4'
cctxt = self.client.prepare(version=version)
return cctxt.call(context, 'migrate_server', **kw)
nova-conductor会收到该请求,根据路由映射,该请求会递交给nova/couductor/manager.py.ComputeTaskManager.migrate_server()去处理。其代码和注释如下:
@messaging.expected_exceptions(exception.NoValidHost,
exception.ComputeServiceUnavailable,
exception.InvalidHypervisorType,
exception.InvalidCPUInfo,
exception.UnableToMigrateToSelf,
exception.DestinationHypervisorTooOld,
exception.InvalidLocalStorage,
exception.InvalidSharedStorage,
exception.HypervisorUnavailable,
exception.InstanceInvalidState,
exception.MigrationPreCheckError,
exception.LiveMigrationWithOldNovaNotSafe,
exception.UnsupportedPolicyException)
def migrate_server(self, context, instance, scheduler_hint, live, rebuild,
flavor, block_migration, disk_over_commit, reservations=None,
clean_shutdown=True):
if instance and not isinstance(instance, nova_object.NovaObject):
# NOTE(danms): Until v2 of the RPC API, we need to tolerate
# old-world instance objects here
attrs = ['metadata', 'system_metadata', 'info_cache',
'security_groups']
instance = objects.Instance._from_db_object(
context, objects.Instance(), instance,
expected_attrs=attrs)
# NOTE: Remove this when we drop support for v1 of the RPC API
if flavor and not isinstance(flavor, objects.Flavor):
# Code downstream may expect extra_specs to be populated since it
# is receiving an object, so lookup the flavor to ensure this.
flavor = objects.Flavor.get_by_id(context, flavor['id'])
if live and not rebuild and not flavor:
self._live_migrate(context, instance, scheduler_hint,
block_migration, disk_over_commit)
#非热迁移、非重建且flavor,也即resize或者是冷迁移,下面两者开始执行相同的逻辑代码
elif not live and not rebuild and flavor:
instance_uuid = instance.uuid
with compute_utils.EventReporter(context, 'cold_migrate',
instance_uuid):
self._cold_migrate(context, instance, flavor,
scheduler_hint['filter_properties'],
reservations, clean_shutdown)
else:
raise NotImplementedError()
接着调用_cold_migrate()方法,其代码和注释如下:
def _cold_migrate(self, context, instance, flavor, filter_properties,
reservations, clean_shutdown):
image = utils.get_image_from_system_metadata(
instance.system_metadata)
request_spec = scheduler_utils.build_request_spec(
context, image, [instance], instance_type=flavor)
task = self._build_cold_migrate_task(context, instance, flavor,
filter_properties, request_spec,
reservations, clean_shutdown)
try:
task.execute()
except exception.NoValidHost as ex:
vm_state = instance.vm_state
if not vm_state:
vm_state = vm_states.ACTIVE
updates = {'vm_state': vm_state, 'task_state': None}
self._set_vm_state_and_notify(context, instance.uuid,
'migrate_server',
updates, ex, request_spec)
# if the flavor IDs match, it's migrate; otherwise resize
if flavor.id == instance.instance_type_id:
msg = _("No valid host found for cold migrate")
else:
msg = _("No valid host found for resize")
raise exception.NoValidHost(reason=msg)
except exception.UnsupportedPolicyException as ex:
with excutils.save_and_reraise_exception():
vm_state = instance.vm_state
if not vm_state:
vm_state = vm_states.ACTIVE
updates = {'vm_state': vm_state, 'task_state': None}
self._set_vm_state_and_notify(context, instance.uuid,
'migrate_server',
updates, ex, request_spec)
except Exception as ex:
with excutils.save_and_reraise_exception():
updates = {'vm_state': instance.vm_state,
'task_state': None}
self._set_vm_state_and_notify(context, instance.uuid,
'migrate_server',
updates, ex, request_spec)
接着调用_build_cold_migrate_task()方法,其代码和注释如下:
def _build_cold_migrate_task(self, context, instance, flavor,
filter_properties, request_spec, reservations,
clean_shutdown):
return migrate.MigrationTask(context, instance, flavor,
filter_properties, request_spec,
reservations, clean_shutdown,
self.compute_rpcapi,
self.scheduler_client)
后面会进一步调用nova/couductor/tasks/migrate.py.MigrationTask._excute()方法。其代码和注释如下:
def _execute(self):
image = self.request_spec.get('image')
self.quotas = objects.Quotas.from_reservations(self.context,
self.reservations,
instance=self.instance)
scheduler_utils.setup_instance_group(self.context, self.request_spec,
self.filter_properties)
scheduler_utils.populate_retry(self.filter_properties,
self.instance.uuid)
'''发送同步消息给nova-scheduler,选取用于迁移云主机的主机
接口调用如下:SchedulerClient -> SchedulerQueryClient -
> SchedulerAPI'''
hosts = self.scheduler_client.select_destinations(
self.context, self.request_spec, self.filter_properties)
#从hosts中选择第一个host进行冷迁移(第一个即为权重最大者)
host_state = hosts[0]
scheduler_utils.populate_filter_properties(self.filter_properties,
host_state)
# context is not serializable
self.filter_properties.pop('context', None)
(host, node) = (host_state['host'], host_state['nodename'])
self.compute_rpcapi.prep_resize(
self.context, image, self.instance, self.flavor, host,
self.reservations, request_spec=self.request_spec,
filter_properties=self.filter_properties, node=node,
clean_shutdown=self.clean_shutdown)
接下来继续调用nova\scheduler\rpcapi.py.select_destinations()方法,其代码和注释如下:
def select_destinations(self, ctxt, request_spec, filter_properties):
cctxt = self.client.prepare(version='4.0')
return cctxt.call(ctxt, 'select_destinations',
request_spec=request_spec, filter_properties=filter_properties)
接下来进一步调用nova/scheduler/manager.py.SchedulerManager.select_destinations()方法。其代码和注释如下:
def select_destinations(self, context, request_spec, filter_properties):
"""Returns destinations(s) best suited for this request_spec and
filter_properties.
The result should be a list of dicts with 'host', 'nodename' and
'limits' as keys.
"""
dests = self.driver.select_destinations(context, request_spec,
filter_properties)
return jsonutils.to_primitive(dests)
此时要注意,scheduler_driver的类型,该参数是在nova.conf配置的,默认采用nova.scheduler.filter_scheduler.FilterScheduler。故应该调用nova/scheduler/filter_scheduler.py.select_destinations()方法,其代码和注释如下:
def select_destinations(self, context, request_spec, filter_properties):
"""Selects a filtered set of hosts and nodes."""
# TODO(sbauza): Change the select_destinations method to accept a
# RequestSpec object directly (and add a new RPC API method for passing
# a RequestSpec object over the wire)
spec_obj = objects.RequestSpec.from_primitives(context,
request_spec,
filter_properties)
self.notifier.info(
context, 'scheduler.select_destinations.start',
dict(request_spec=spec_obj.to_legacy_request_spec_dict()))
num_instances = spec_obj.num_instances
selected_hosts = self._schedule(context, spec_obj)
# Couldn't fulfill the request_spec
if len(selected_hosts) < num_instances:
# NOTE(Rui Chen): If multiple creates failed, set the updated time
# of selected HostState to None so that these HostStates are
# refreshed according to database in next schedule, and release
# the resource consumed by instance in the process of selecting
# host.
for host in selected_hosts:
host.obj.updated = None
# Log the details but don't put those into the reason since
# we don't want to give away too much information about our
# actual environment.
LOG.debug('There are %(hosts)d hosts available but '
'%(num_instances)d instances requested to build.',
{'hosts': len(selected_hosts),
'num_instances': num_instances})
reason = _('There are not enough hosts available.')
raise exception.NoValidHost(reason=reason)
dests = [dict(host=host.obj.host, nodename=host.obj.nodename,
limits=host.obj.limits) for host in selected_hosts]
self.notifier.info(
context, 'scheduler.select_destinations.end',
dict(request_spec=spec_obj.to_legacy_request_spec_dict()))
return dests
其中_schedule()方法如下:
def _schedule(self, context, spec_obj):
"""Returns a list of hosts that meet the required specs,
ordered by their fitness.
"""
elevated = context.elevated()
#加载nova.conf文件中的过滤选项
#用户可以通过nova.conf中的scheduler_json_config_location
#参数指定一个包含过滤参数的json格式的过滤文件
config_options = self._get_configuration_options()
# Find our local list of acceptable hosts by repeatedly
# filtering and weighing our options. Each time we choose a
# host, we virtually consume resources on it so subsequent
# selections can adjust accordingly.
# Note: remember, we are using an iterator here. So only
# traverse this list once. This can bite you if the hosts
# are being scanned in a filter or weighing function.
#获取所有的活动主机
hosts = self._get_all_host_states(elevated)
selected_hosts = []
num_instances = spec_obj.num_instances
# TODO(sbauza): Modify the interfaces for HostManager and filters to
# accept the RequestSpec object directly (in a later patch hopefully)
filter_properties = spec_obj.to_legacy_filter_properties_dict()
# NOTE(sbauza): Adding temporarly some keys since filters are
# directly using it - until we provide directly RequestSpec
filter_properties.update(
{'request_spec': spec_obj.to_legacy_request_spec_dict(),
'instance_type': spec_obj.flavor})
# TODO(sbauza): Adding two keys not used in-tree but which will be
# provided as non-fields for the RequestSpec once we provide it to the
# filters
#更新过滤参数
filter_properties.update({'context': context,
'config_options': config_options})
for num in range(num_instances):
# Filter local hosts based on requirements ...
# 返回满足过滤条件的host,所使用的过滤器可以通过nova.conf文件中
#的scheduler_default_filters选项指定,相关的过滤器代码位于
#nova/scheduler/filters
hosts = self.host_manager.get_filtered_hosts(hosts,
filter_properties, index=num)
if not hosts:
# Can't get any more locally.
break
LOG.debug("Filtered %(hosts)s", {'hosts': hosts})
#通过权重过滤器进一步过滤host,返回一个按照权重降序排列的host列
#表,权重过滤器可以通过nova.conf文件中
#的scheduler_weight_classes选项指定,相关的过滤器代码位于nova/scheduler/weights
weighed_hosts = self.host_manager.get_weighed_hosts(hosts,
filter_properties)
LOG.debug("Weighed %(hosts)s", {'hosts': weighed_hosts})
scheduler_host_subset_size = CONF.scheduler_host_subset_size
if scheduler_host_subset_size > len(weighed_hosts):
scheduler_host_subset_size = len(weighed_hosts)
if scheduler_host_subset_size < 1:
scheduler_host_subset_size = 1
#设置host随机选择范围,默认选择第一个
chosen_host = random.choice(
weighed_hosts[0:scheduler_host_subset_size])
LOG.debug("Selected host: %(host)s", {'host': chosen_host})
selected_hosts.append(chosen_host)
# Now consume the resources so the filter/weights
# will change for the next instance.
chosen_host.obj.consume_from_request(spec_obj)
if filter_properties.get('group_updated') is True:
filter_properties['group_hosts'].add(chosen_host.obj.host)
return selected_hosts
接着回到_excute()方法,查看compute_rpcapi.prep_resize()方法,调用nova/compute/rpcapi.py.ComputeAPI.prep_resize()方法其代码如下:
def prep_resize(self, ctxt, image, instance, instance_type, host,
reservations=None, request_spec=None,
filter_properties=None, node=None,
clean_shutdown=True):
image_p = jsonutils.to_primitive(image)
msg_args = {'instance': instance,
'instance_type': instance_type,
'image': image_p,
'reservations': reservations,
'request_spec': request_spec,
'filter_properties': filter_properties,
'node': node,
'clean_shutdown': clean_shutdown}
version = '4.1'
if not self.client.can_send_version(version):
version = '4.0'
msg_args['instance_type'] = objects_base.obj_to_primitive(
instance_type)
cctxt = self.client.prepare(server=host, version=version)
cctxt.cast(ctxt, 'prep_resize', **msg_args)
接着,将消息传给nova/compute/manager.py.ComputeManager.prep_resize()方法,该节点为目的计算节点,其代码和注释如下:
@wrap_exception()
@reverts_task_state
@wrap_instance_event
@wrap_instance_fault
def prep_resize(self, context, image, instance, instance_type,
reservations, request_spec, filter_properties, node,
clean_shutdown):
"""Initiates the process of moving a running instance to another host.
Possibly changes the RAM and disk size in the process.
"""
if node is None:
#假如上述调度中没有选择传递node,则会再次选择一个node
node = self.driver.get_available_nodes(refresh=True)[0]
LOG.debug("No node specified, defaulting to %s", node,
instance=instance)
# NOTE(melwitt): Remove this in version 5.0 of the RPC API
# Code downstream may expect extra_specs to be populated since it
# is receiving an object, so lookup the flavor to ensure this.
if not isinstance(instance_type, objects.Flavor):
instance_type = objects.Flavor.get_by_id(context,
instance_type['id'])
quotas = objects.Quotas.from_reservations(context,
reservations,
instance=instance)
with self._error_out_instance_on_exception(context, instance,
quotas=quotas):
compute_utils.notify_usage_exists(self.notifier, context, instance,
current_period=True)
self._notify_about_instance_usage(
context, instance, "resize.prep.start")
try:
self._prep_resize(context, image, instance,
instance_type, quotas,
request_spec, filter_properties,
node, clean_shutdown)
# NOTE(dgenin): This is thrown in LibvirtDriver when the
# instance to be migrated is backed by LVM.
# Remove when LVM migration is implemented.
except exception.MigrationPreCheckError:
raise
except Exception:
# try to re-schedule the resize elsewhere:
exc_info = sys.exc_info()
self._reschedule_resize_or_reraise(context, image, instance,
exc_info, instance_type, quotas, request_spec,
filter_properties)
finally:
extra_usage_info = dict(
new_instance_type=instance_type.name,
new_instance_type_id=instance_type.id)
self._notify_about_instance_usage(
context, instance, "resize.prep.end",
extra_usage_info=extra_usage_info)
后面的内容下次继续讲解。