1. nova/api/openstack/compute/servers.py文件中
def _action_reboot(self, req, id, body):
reboot_type = body['reboot']['type'].upper()
context = req.environ['nova.context']
instance = self._get_server(context, req, id)
context.can(server_policies.SERVERS % 'reboot',
target={'project_id': instance.project_id})
try:
#这个是主要调用的函数
self.compute_api.reboot(context, instance, reboot_type)
#这里是异常信息检测
except exception.InstanceIsLocked as e:
raise exc.HTTPConflict(explanation=e.format_message())
except exception.InstanceInvalidState as state_error:
common.raise_http_conflict_for_instance_invalid_state(state_error,
'reboot', id)
2. nova/compute/api.py文件中
@check_instance_lock
def reboot(self, context, instance, reboot_type):
"""Reboot the given instance."""
if reboot_type == 'SOFT':
self._soft_reboot(context, instance)
else:
self._hard_reboot(context, instance)
这里有一点小小的问题,因为在步骤1中有云主机状态的检测,但是在这里并没有状态检测的装饰器
self._soft_reboot和self._hard_reboot函数
3. 软重启和硬重启,前面调用的函数流程是一样的,只不过reboot_type参数是‘SOFT’和‘HARD’
def _soft_reboot(self, context, instance):
expected_task_state = [None]
instance.task_state = task_states.REBOOTING
instance.save(expected_task_state=expected_task_state)
self._record_action_start(context, instance, instance_actions.REBOOT)
self.compute_rpcapi.reboot_instance(context, instance=instance,
block_device_info=None,
reboot_type='SOFT')
首先记录状态
这里有点不太明白了
compute_utils.notify_about_instance_action和self._record_action_start(context, instance, instance_actions.REBOOT)函数之间有什么区别呢
def _record_action_start(self, context, instance, action):
objects.InstanceAction.action_start(context, instance.uuid,
action, want_result=False)
@base.remotable_classmethod
def action_start(cls, context, instance_uuid, action_name,
want_result=True):
values = cls.pack_action_start(context, instance_uuid, action_name)
db_action = db.action_start(context, values)
if want_result:
return cls._from_db_object(context, cls(), db_action)
self._record_action_start函数的作用是向数据库中记录云主机的操作!这个函数的调用是在发送RPC消息之前调用的。
compute_utils.notify_about_instance_action函数是在发送RPC消息之后调用的!
4.
@wrap_exception()
@reverts_task_state
@wrap_instance_event(prefix='compute')
@wrap_instance_fault
def reboot_instance(self, context, instance, block_device_info,
reboot_type):
@utils.synchronized(instance.uuid)
def do_reboot_instance(context, instance, block_device_info,
reboot_type):
self._reboot_instance(context, instance, block_device_info,
reboot_type)
do_reboot_instance(context, instance, block_device_info, reboot_type)
这里code写的有点绕
主要调用的代码是_reboot_instance
def _reboot_instance(self, context, instance, block_device_info,
reboot_type):
"""Reboot an instance on this host."""
# acknowledge the request made it to the manager
#分为软重启和硬重启
if reboot_type == "SOFT":
instance.task_state = task_states.REBOOT_PENDING
expected_states = task_states.soft_reboot_states
else:
instance.task_state = task_states.REBOOT_PENDING_HARD
expected_states = task_states.hard_reboot_states
context = context.elevated()
LOG.info("Rebooting instance", instance=instance)
#bdms信息是从数据库中根据instance的uuid查找得到的块设备挂载信息表
bdms = objects.BlockDeviceMappingList.get_by_instance_uuid(
context, instance.uuid)
#然后根据这个bdms信息得到block_device_info表
block_device_info = self._get_instance_block_device_info(
context, instance, bdms=bdms)
network_info = self.network_api.get_instance_nw_info(context, instance)
accel_info = self._get_accel_info(context, instance)
self._notify_about_instance_usage(context, instance, "reboot.start")
compute_utils.notify_about_instance_action(
context, instance, self.host,
action=fields.NotificationAction.REBOOT,
phase=fields.NotificationPhase.START,
bdms=bdms
)
instance.power_state = self._get_power_state(context, instance)
instance.save(expected_task_state=expected_states)
if instance.power_state != power_state.RUNNING:
state = instance.power_state
running = power_state.RUNNING
LOG.warning('trying to reboot a non-running instance:'
' (state: %(state)s expected: %(running)s)',
{'state': state, 'running': running},
instance=instance)
def bad_volumes_callback(bad_devices):
self._handle_bad_volumes_detached(
context, instance, bad_devices, block_device_info)
try:
# Don't change it out of rescue mode
if instance.vm_state == vm_states.RESCUED:
new_vm_state = vm_states.RESCUED
else:
new_vm_state = vm_states.ACTIVE
new_power_state = None
if reboot_type == "SOFT":
instance.task_state = task_states.REBOOT_STARTED
expected_state = task_states.REBOOT_PENDING
else:
instance.task_state = task_states.REBOOT_STARTED_HARD
expected_state = task_states.REBOOT_PENDING_HARD
instance.save(expected_task_state=expected_state)
self.driver.reboot(context, instance,
network_info,
reboot_type,
block_device_info=block_device_info,
accel_info=accel_info,
bad_volumes_callback=bad_volumes_callback)
except Exception as error:
with excutils.save_and_reraise_exception() as ctxt:
exc_info = sys.exc_info()
# if the reboot failed but the VM is running don't
# put it into an error state
new_power_state = self._get_power_state(context, instance)
if new_power_state == power_state.RUNNING:
LOG.warning('Reboot failed but instance is running',
instance=instance)
compute_utils.add_instance_fault_from_exc(context,
instance, error, exc_info)
self._notify_about_instance_usage(context, instance,
'reboot.error', fault=error)
compute_utils.notify_about_instance_action(
context, instance, self.host,
action=fields.NotificationAction.REBOOT,
phase=fields.NotificationPhase.ERROR,
exception=error, bdms=bdms
)
ctxt.reraise = False
else:
LOG.error('Cannot reboot instance: %s', error,
instance=instance)
self._set_instance_obj_error_state(context, instance)
#重启成功之后,保存云主机的状态
if not new_power_state:
new_power_state = self._get_power_state(context, instance)
try:
instance.power_state = new_power_state
instance.vm_state = new_vm_state
instance.task_state = None
instance.save()
except exception.InstanceNotFound:
LOG.warning("Instance disappeared during reboot",
instance=instance)
self._notify_about_instance_usage(context, instance, "reboot.end")
compute_utils.notify_about_instance_action(
context, instance, self.host,
action=fields.NotificationAction.REBOOT,
phase=fields.NotificationPhase.END,
bdms=bdms
)
4.1 _get_instance_block_device_info函数是如何调用的?
def _get_instance_block_device_info(self, context, instance,
refresh_conn_info=False,
bdms=None):
"""Transform block devices to the driver block_device format."""
if bdms is None:
bdms = objects.BlockDeviceMappingList.get_by_instance_uuid(
context, instance.uuid)
block_device_info = driver.get_block_device_info(instance, bdms)
if not refresh_conn_info:
# if the block_device_mapping has no value in connection_info
# (returned as None), don't include in the mapping
block_device_info['block_device_mapping'] = [
bdm for bdm in driver.block_device_info_get_mapping(
block_device_info)
if bdm.get('connection_info')]
else:
driver_block_device.refresh_conn_infos(
driver.block_device_info_get_mapping(block_device_info),
context, instance, self.volume_api, self.driver)
self._block_device_info_to_legacy(block_device_info)
return block_device_info
5.soft_reboot
def _soft_reboot(self, instance):
"""Attempt to shutdown and restart the instance gracefully."""
guest = self._host.get_guest(instance)
state = guest.get_power_state(self._host)
old_domid = guest.id
# NOTE(vish): This check allows us to reboot an instance that
# is already shutdown.
if state == power_state.RUNNING:
guest.shutdown()
# NOTE(vish): This actually could take slightly longer than the
# FLAG defines depending on how long the get_info
# call takes to return.
self._prepare_pci_devices_for_use(
pci_manager.get_instance_pci_devs(instance, 'all'))
for x in range(CONF.libvirt.wait_soft_reboot_seconds):
guest = self._host.get_guest(instance)
state = guest.get_power_state(self._host)
new_domid = guest.id
# NOTE(ivoks): By checking domain IDs, we make sure we are
# not recreating domain that's already running.
if old_domid != new_domid:
if state in [power_state.SHUTDOWN,
power_state.CRASHED]:
LOG.info("Instance shutdown successfully.",
instance=instance)
self._create_domain(domain=guest._domain)
timer = loopingcall.FixedIntervalLoopingCall(
self._wait_for_running, instance)
timer.start(interval=0.5).wait()
return True
else:
LOG.info("Instance may have been rebooted during soft "
"reboot, so return now.", instance=instance)
return True
greenthread.sleep(1)
return False
软重启的过程就是调用domain中的shutdown和start,由于domain.id只有虚拟机在运行的时候才会存在。从代码中看一下,重启之后其domain.id也应该会变化。
6. hard_reboot
def _hard_reboot(self, context, instance, network_info,
block_device_info=None, accel_info=None):
"""Reboot a virtual machine, given an instance reference.
Performs a Libvirt reset (if supported) on the domain.
If Libvirt reset is unavailable this method actually destroys and
re-creates the domain to ensure the reboot happens, as the guest
OS cannot ignore this action.
"""
# NOTE(sbauza): Since we undefine the guest XML when destroying, we
# need to remember the existing mdevs for reusing them.
mdevs = self._get_all_assigned_mediated_devices(instance)
mdevs = list(mdevs.keys())
# NOTE(mdbooth): In addition to performing a hard reboot of the domain,
# the hard reboot operation is relied upon by operators to be an
# automated attempt to fix as many things as possible about a
# non-functioning instance before resorting to manual intervention.
# With this goal in mind, we tear down all the aspects of an instance
# we can here without losing data. This allows us to re-initialise from
# scratch, and hopefully fix, most aspects of a non-functioning guest.
self.destroy(context, instance, network_info, destroy_disks=False,
block_device_info=block_device_info)
# Convert the system metadata to image metadata
# NOTE(mdbooth): This is a workaround for stateless Nova compute
# https://bugs.launchpad.net/nova/+bug/1349978
instance_dir = libvirt_utils.get_instance_path(instance)
fileutils.ensure_tree(instance_dir)
disk_info = blockinfo.get_disk_info(CONF.libvirt.virt_type,
instance,
instance.image_meta,
block_device_info)
# NOTE(vish): This could generate the wrong device_format if we are
# using the raw backend and the images don't exist yet.
# The create_images_and_backing below doesn't properly
# regenerate raw backend images, however, so when it
# does we need to (re)generate the xml after the images
# are in place.
xml = self._get_guest_xml(context, instance, network_info, disk_info,
instance.image_meta,
block_device_info=block_device_info,
mdevs=mdevs, accel_info=accel_info)
# NOTE(mdbooth): context.auth_token will not be set when we call
# _hard_reboot from resume_state_on_host_boot()
if context.auth_token is not None:
# NOTE (rmk): Re-populate any missing backing files.
config = vconfig.LibvirtConfigGuest()
config.parse_str(xml)
backing_disk_info = self._get_instance_disk_info_from_config(
config, block_device_info)
self._create_images_and_backing(context, instance, instance_dir,
backing_disk_info)
# Initialize all the necessary networking, block devices and
# start the instance.
# NOTE(melwitt): Pass vifs_already_plugged=True here even though we've
# unplugged vifs earlier. The behavior of neutron plug events depends
# on which vif type we're using and we are working with a stale network
# info cache here, so won't rely on waiting for neutron plug events.
# vifs_already_plugged=True means "do not wait for neutron plug events"
self._create_domain_and_network(context, xml, instance, network_info,
block_device_info=block_device_info,
vifs_already_plugged=True)
self._prepare_pci_devices_for_use(
pci_manager.get_instance_pci_devs(instance, 'all'))
def _wait_for_reboot():
"""Called at an interval until the VM is running again."""
state = self.get_info(instance).state
if state == power_state.RUNNING:
LOG.info("Instance rebooted successfully.",
instance=instance)
raise loopingcall.LoopingCallDone()
timer = loopingcall.FixedIntervalLoopingCall(_wait_for_reboot)
timer.start(interval=0.5).wait()
这个直接就destroy了,属于直接把虚拟机断电了,然后将相应的外设信息清除;再建立新的xml文件的信息,重建虚拟机