在通过/nova/compute/manager.py:ComputeManager中的_build_and_run_instance来创建虚拟机的时候,会通过claim机制来监视当前的资源是否够创建虚拟机
def _build_and_run_instance(self, context, instance, image, injected_files,
admin_password, requested_networks, security_groups,
block_device_mapping, node, limits, filter_properties):
image_name = image.get('name')
self._notify_about_instance_usage(context, instance, 'create.start',
extra_usage_info={'image_name': image_name})
self._check_device_tagging(requested_networks, block_device_mapping)
try:
rt = self._get_resource_tracker(node)
with rt.instance_claim(context, instance, limits):
可以看到这里下通过_get_resource_tracker 得到rt对象
def _get_resource_tracker(self, nodename):
rt = self._resource_tracker_dict.get(nodename)
if not rt:
if not self.driver.node_is_available(nodename):
raise exception.NovaException(
_("%s is not a valid node managed by this "
"compute host.") % nodename)
rt = resource_tracker.ResourceTracker(self.host,
self.driver,
nodename)
self._resource_tracker_dict[nodename] = rt
return rt
_get_resource_tracker 首先检查_resource_tracker_dict是否已经包含这个rt了,没有的话,就通过resource_tracker.ResourceTracker 来创建这个class
其中ResourceTracker是在compute/resource_tracker.py 只是简单的赋值.最后将新创建的rt加到_resource_tracker_dict中
继续看instance_claim
def instance_claim(self, context, instance, limits=None):
"""Indicate that some resources are needed for an upcoming compute
instance build operation.
This should be called before the compute node is about to perform
an instance build operation that will consume additional resources.
:param context: security context
:param instance: instance to reserve resources for.
:type instance: nova.objects.instance.Instance object
:param limits: Dict of oversubscription limits for memory, disk,
and CPUs.
:returns: A Claim ticket representing the reserved resources. It can
be used to revert the resource usage if an error occurs
during the instance build.
"""
if self.disabled:
# compute_driver doesn't support resource tracking, just
# set the 'host' and node fields and continue the build:
self._set_instance_host_and_node(instance)
return claims.NopClaim()
# sanity checks:
if instance.host:
LOG.warning(_LW("Host field should not be set on the instance "
"until resources have been claimed."),
instance=instance)
if instance.node:
LOG.warning(_LW("Node field should not be set on the instance "
"until resources have been claimed."),
instance=instance)
# get the overhead required to build this instance:
overhead = self.driver.estimate_instance_overhead(instance)
LOG.debug("Memory overhead for %(flavor)d MB instance; %(overhead)d "
"MB", {'flavor': instance.flavor.memory_mb,
'overhead': overhead['memory_mb']})
LOG.debug("Disk overhead for %(flavor)d GB instance; %(overhead)d "
"GB", {'flavor': instance.flavor.root_gb,
'overhead': overhead.get('disk_gb', 0)})
pci_requests = objects.InstancePCIRequests.get_by_instance_uuid(
context, instance.uuid)
claim = claims.Claim(context, instance, self, self.compute_node,
pci_requests, overhead=overhead, limits=limits)
# self._set_instance_host_and_node() will save instance to the DB
# so set instance.numa_topology first. We need to make sure
# that numa_topology is saved while under COMPUTE_RESOURCE_SEMAPHORE
# so that the resource audit knows about any cpus we've pinned.
instance_numa_topology = claim.claimed_numa_topology
instance.numa_topology = instance_numa_topology
self._set_instance_host_and_node(instance)
if self.pci_tracker:
# NOTE(jaypipes): ComputeNode.pci_device_pools is set below
# in _update_usage_from_instance().
self.pci_tracker.claim_instance(context, pci_requests,
instance_numa_topology)
# Mark resources in-use and update stats
self._update_usage_from_instance(context, instance)
elevated = context.elevated()
# persist changes to the compute node:
self._update(elevated)
return claim
在instance_claim 中重点是申明了claim = claims.Claim(context, instance, self, self.compute_node,
pci_requests, overhead=overhead, limits=limits)
我们继续看看
class Claim(NopClaim):
"""A declaration that a compute host operation will require free resources.
Claims serve as marker objects that resources are being held until the
update_available_resource audit process runs to do a full reconciliation
of resource usage.
This information will be used to help keep the local compute hosts's
ComputeNode model in sync to aid the scheduler in making efficient / more
correct decisions with respect to host selection.
"""
def __init__(self, context, instance, tracker, resources, pci_requests,
overhead=None, limits=None):
super(Claim, self).__init__()
# Stash a copy of the instance at the current point of time
self.instance = instance.obj_clone()
self._numa_topology_loaded = False
self.tracker = tracker
self._pci_requests = pci_requests
if not overhead:
overhead = {'memory_mb': 0,
'disk_gb': 0}
self.overhead = overhead
self.context = context
# Check claim at constructor to avoid mess code
# Raise exception ComputeResourcesUnavailable if claim failed
self._claim_test(resources, limits)
初始化一些变量,最重要的就是调用_claim_test
def _claim_test(self, resources, limits=None):
"""Test if this claim can be satisfied given available resources and
optional oversubscription limits
This should be called before the compute node actually consumes the
resources required to execute the claim.
:param resources: available local compute node resources
:returns: Return true if resources are available to claim.
"""
if not limits:
limits = {}
# If an individual limit is None, the resource will be considered
# unlimited:
memory_mb_limit = limits.get('memory_mb')
disk_gb_limit = limits.get('disk_gb')
vcpus_limit = limits.get('vcpu')
numa_topology_limit = limits.get('numa_topology')
LOG.info(_LI("Attempting claim: memory %(memory_mb)d MB, "
"disk %(disk_gb)d GB, vcpus %(vcpus)d CPU"),
{'memory_mb': self.memory_mb, 'disk_gb': self.disk_gb,
'vcpus': self.vcpus}, instance=self.instance)
reasons = [self._test_memory(resources, memory_mb_limit),
self._test_disk(resources, disk_gb_limit),
self._test_vcpus(resources, vcpus_limit),
self._test_numa_topology(resources, numa_topology_limit),
self._test_pci()]
reasons = [r for r in reasons if r is not None]
if len(reasons) > 0:
raise exception.ComputeResourcesUnavailable(reason=
"; ".join(reasons))
LOG.info(_LI('Claim successful'), instance=self.instance)
在_claim_test 中我们可以看到创建一个虚拟机主要check 四个方面的资源,分别是memory/disk/vcpu/numa。
针对这四中资源分别调用self._test_memory/self._test_disk/self._test_vcpus/self._test_numa_topology/self._test_pci() 来check
def _test_memory(self, resources, limit):
type_ = _("memory")
unit = "MB"
total = resources.memory_mb
used = resources.memory_mb_used
requested = self.memory_mb
return self._test(type_, unit, total, used, requested, limit)
def _test(self, type_, unit, total, used, requested, limit):
"""Test if the given type of resource needed for a claim can be safely
allocated.
"""
LOG.info(_LI('Total %(type)s: %(total)d %(unit)s, used: %(used).02f '
'%(unit)s'),
{'type': type_, 'total': total, 'unit': unit, 'used': used},
instance=self.instance)
if limit is None:
# treat resource as unlimited:
LOG.info(_LI('%(type)s limit not specified, defaulting to '
'unlimited'), {'type': type_}, instance=self.instance)
return
free = limit - used
# Oversubscribed resource policy info:
LOG.info(_LI('%(type)s limit: %(limit).02f %(unit)s, '
'free: %(free).02f %(unit)s'),
{'type': type_, 'limit': limit, 'free': free, 'unit': unit},
instance=self.instance)
if requested > free:
return (_('Free %(type)s %(free).02f '
'%(unit)s < requested %(requested)d %(unit)s') %
{'type': type_, 'free': free, 'unit': unit,
'requested': requested})
原来memory的check 就是用total -used 是否大于request的。可见仅仅是比较大小而已
def _test_disk(self, resources, limit):
type_ = _("disk")
unit = "GB"
total = resources.local_gb
used = resources.local_gb_used
requested = self.disk_gb
return self._test(type_, unit, total, used, requested, limit)
def _test_vcpus(self, resources, limit):
type_ = _("vcpu")
unit = "VCPU"
total = resources.vcpus
used = resources.vcpus_used
requested = self.vcpus
return self._test(type_, unit, total, used, requested, limit)
disk和cpu也是一样的算法。即看剩下的是否满足要求的。
def _test_pci(self):
pci_requests = self._pci_requests
if pci_requests.requests:
stats = self.tracker.pci_tracker.stats
if not stats.support_requests(pci_requests.requests):
return _('Claim pci failed.')
而pci的检测主要是看当前的tracker.pci_tracker.stats.support_requests 是否包含pci_requests.requests,并没有数量的比较
def _build_and_run_instance(self, context, instance, image, injected_files,
admin_password, requested_networks, security_groups,
block_device_mapping, node, limits, filter_properties):
image_name = image.get('name')
self._notify_about_instance_usage(context, instance, 'create.start',
extra_usage_info={'image_name': image_name})
self._check_device_tagging(requested_networks, block_device_mapping)
try:
rt = self._get_resource_tracker(node)
with rt.instance_claim(context, instance, limits):
可以看到这里下通过_get_resource_tracker 得到rt对象
def _get_resource_tracker(self, nodename):
rt = self._resource_tracker_dict.get(nodename)
if not rt:
if not self.driver.node_is_available(nodename):
raise exception.NovaException(
_("%s is not a valid node managed by this "
"compute host.") % nodename)
rt = resource_tracker.ResourceTracker(self.host,
self.driver,
nodename)
self._resource_tracker_dict[nodename] = rt
return rt
_get_resource_tracker 首先检查_resource_tracker_dict是否已经包含这个rt了,没有的话,就通过resource_tracker.ResourceTracker 来创建这个class
其中ResourceTracker是在compute/resource_tracker.py 只是简单的赋值.最后将新创建的rt加到_resource_tracker_dict中
继续看instance_claim
def instance_claim(self, context, instance, limits=None):
"""Indicate that some resources are needed for an upcoming compute
instance build operation.
This should be called before the compute node is about to perform
an instance build operation that will consume additional resources.
:param context: security context
:param instance: instance to reserve resources for.
:type instance: nova.objects.instance.Instance object
:param limits: Dict of oversubscription limits for memory, disk,
and CPUs.
:returns: A Claim ticket representing the reserved resources. It can
be used to revert the resource usage if an error occurs
during the instance build.
"""
if self.disabled:
# compute_driver doesn't support resource tracking, just
# set the 'host' and node fields and continue the build:
self._set_instance_host_and_node(instance)
return claims.NopClaim()
# sanity checks:
if instance.host:
LOG.warning(_LW("Host field should not be set on the instance "
"until resources have been claimed."),
instance=instance)
if instance.node:
LOG.warning(_LW("Node field should not be set on the instance "
"until resources have been claimed."),
instance=instance)
# get the overhead required to build this instance:
overhead = self.driver.estimate_instance_overhead(instance)
LOG.debug("Memory overhead for %(flavor)d MB instance; %(overhead)d "
"MB", {'flavor': instance.flavor.memory_mb,
'overhead': overhead['memory_mb']})
LOG.debug("Disk overhead for %(flavor)d GB instance; %(overhead)d "
"GB", {'flavor': instance.flavor.root_gb,
'overhead': overhead.get('disk_gb', 0)})
pci_requests = objects.InstancePCIRequests.get_by_instance_uuid(
context, instance.uuid)
claim = claims.Claim(context, instance, self, self.compute_node,
pci_requests, overhead=overhead, limits=limits)
# self._set_instance_host_and_node() will save instance to the DB
# so set instance.numa_topology first. We need to make sure
# that numa_topology is saved while under COMPUTE_RESOURCE_SEMAPHORE
# so that the resource audit knows about any cpus we've pinned.
instance_numa_topology = claim.claimed_numa_topology
instance.numa_topology = instance_numa_topology
self._set_instance_host_and_node(instance)
if self.pci_tracker:
# NOTE(jaypipes): ComputeNode.pci_device_pools is set below
# in _update_usage_from_instance().
self.pci_tracker.claim_instance(context, pci_requests,
instance_numa_topology)
# Mark resources in-use and update stats
self._update_usage_from_instance(context, instance)
elevated = context.elevated()
# persist changes to the compute node:
self._update(elevated)
return claim
在instance_claim 中重点是申明了claim = claims.Claim(context, instance, self, self.compute_node,
pci_requests, overhead=overhead, limits=limits)
我们继续看看
class Claim(NopClaim):
"""A declaration that a compute host operation will require free resources.
Claims serve as marker objects that resources are being held until the
update_available_resource audit process runs to do a full reconciliation
of resource usage.
This information will be used to help keep the local compute hosts's
ComputeNode model in sync to aid the scheduler in making efficient / more
correct decisions with respect to host selection.
"""
def __init__(self, context, instance, tracker, resources, pci_requests,
overhead=None, limits=None):
super(Claim, self).__init__()
# Stash a copy of the instance at the current point of time
self.instance = instance.obj_clone()
self._numa_topology_loaded = False
self.tracker = tracker
self._pci_requests = pci_requests
if not overhead:
overhead = {'memory_mb': 0,
'disk_gb': 0}
self.overhead = overhead
self.context = context
# Check claim at constructor to avoid mess code
# Raise exception ComputeResourcesUnavailable if claim failed
self._claim_test(resources, limits)
初始化一些变量,最重要的就是调用_claim_test
def _claim_test(self, resources, limits=None):
"""Test if this claim can be satisfied given available resources and
optional oversubscription limits
This should be called before the compute node actually consumes the
resources required to execute the claim.
:param resources: available local compute node resources
:returns: Return true if resources are available to claim.
"""
if not limits:
limits = {}
# If an individual limit is None, the resource will be considered
# unlimited:
memory_mb_limit = limits.get('memory_mb')
disk_gb_limit = limits.get('disk_gb')
vcpus_limit = limits.get('vcpu')
numa_topology_limit = limits.get('numa_topology')
LOG.info(_LI("Attempting claim: memory %(memory_mb)d MB, "
"disk %(disk_gb)d GB, vcpus %(vcpus)d CPU"),
{'memory_mb': self.memory_mb, 'disk_gb': self.disk_gb,
'vcpus': self.vcpus}, instance=self.instance)
reasons = [self._test_memory(resources, memory_mb_limit),
self._test_disk(resources, disk_gb_limit),
self._test_vcpus(resources, vcpus_limit),
self._test_numa_topology(resources, numa_topology_limit),
self._test_pci()]
reasons = [r for r in reasons if r is not None]
if len(reasons) > 0:
raise exception.ComputeResourcesUnavailable(reason=
"; ".join(reasons))
LOG.info(_LI('Claim successful'), instance=self.instance)
在_claim_test 中我们可以看到创建一个虚拟机主要check 四个方面的资源,分别是memory/disk/vcpu/numa。
针对这四中资源分别调用self._test_memory/self._test_disk/self._test_vcpus/self._test_numa_topology/self._test_pci() 来check
def _test_memory(self, resources, limit):
type_ = _("memory")
unit = "MB"
total = resources.memory_mb
used = resources.memory_mb_used
requested = self.memory_mb
return self._test(type_, unit, total, used, requested, limit)
def _test(self, type_, unit, total, used, requested, limit):
"""Test if the given type of resource needed for a claim can be safely
allocated.
"""
LOG.info(_LI('Total %(type)s: %(total)d %(unit)s, used: %(used).02f '
'%(unit)s'),
{'type': type_, 'total': total, 'unit': unit, 'used': used},
instance=self.instance)
if limit is None:
# treat resource as unlimited:
LOG.info(_LI('%(type)s limit not specified, defaulting to '
'unlimited'), {'type': type_}, instance=self.instance)
return
free = limit - used
# Oversubscribed resource policy info:
LOG.info(_LI('%(type)s limit: %(limit).02f %(unit)s, '
'free: %(free).02f %(unit)s'),
{'type': type_, 'limit': limit, 'free': free, 'unit': unit},
instance=self.instance)
if requested > free:
return (_('Free %(type)s %(free).02f '
'%(unit)s < requested %(requested)d %(unit)s') %
{'type': type_, 'free': free, 'unit': unit,
'requested': requested})
原来memory的check 就是用total -used 是否大于request的。可见仅仅是比较大小而已
def _test_disk(self, resources, limit):
type_ = _("disk")
unit = "GB"
total = resources.local_gb
used = resources.local_gb_used
requested = self.disk_gb
return self._test(type_, unit, total, used, requested, limit)
def _test_vcpus(self, resources, limit):
type_ = _("vcpu")
unit = "VCPU"
total = resources.vcpus
used = resources.vcpus_used
requested = self.vcpus
return self._test(type_, unit, total, used, requested, limit)
disk和cpu也是一样的算法。即看剩下的是否满足要求的。
def _test_pci(self):
pci_requests = self._pci_requests
if pci_requests.requests:
stats = self.tracker.pci_tracker.stats
if not stats.support_requests(pci_requests.requests):
return _('Claim pci failed.')
而pci的检测主要是看当前的tracker.pci_tracker.stats.support_requests 是否包含pci_requests.requests,并没有数量的比较