这篇先学习下nova-compute每次启动时提供服务之前的准备工作
nova-compute支持多种虚拟化驱动,包括LibvirtDriver、XenAPIDriver、FakeDriver、BareMetalDriver、VMwareESXDriver、VMwareVCDriver和HyperVDriver。从驱动名可以看出对应的虚拟化技术,一般在Linux服务器上,我们普遍会使用Qemu和Kvm这两种虚拟化技术(说法可能不太严谨, Kvm其实只是在Qemu的基础上实现了CPU硬件加速),那么选择的驱动应该为LibvirtDriver,下面也会以这个驱动进行讲解。Libvirt是一个虚拟化API库,通过对不同的虚拟化backend进行抽象来提供统一的编程接口,目前支持KVM/QEMU/Xen/Virtuozzo/VMWare ESX/LXC/BHyve等,在nova-compute中就会使用到Libvirt的python-binding。
通过对启动流程的分析,可以大体分为以下几步:
1.进行Libvirt驱动的初始化工作,包括初始化Libvirt连接、创建事件队列和事件调度loop、通过原生线程运行Libvirt自带的事件loop,每当实例有事件发生,那么Libvirt的事件loop就会回调我们注册的方法来将事件放入事件队列中,然后唤醒事件调度loop对事件进行处理。作为管理平台,跟踪实例的状态信息是十分重要的,当我们使用实例时,关闭guest后,可以看到OpenStack通过Libvirt的事件loop能很快更新实例的状态为shutoff。
2.清理主机的已撤离、删除或者残留的实例,并对主机上的现有实例进行初始化操作,譬如我们重启服务器后需要恢复实例原来的状态;在初始化实例时,nova-compute也会保证虚拟网络的网桥、vlan设备等的存在,这也证实了多节点不使用multi_host的可行性,因为nova-compute也有责任创建网桥;
3.根据“是否延迟iptables的应用“配置,我们可以将iptables规则的应用时机延迟到最后一步,那么以上步骤中涉及到iptables规则的操作并不会生效;默认配置是False,也就是不延迟。
# nova-compute使用eventlet的greenpool来实现网络并发,
# 但是在调用一些C库的接口时, eventlet的monkey_patch并不能修改它们, 导致
# 在协程中调用它们会被阻塞, 所以需要使用eventlet的另一个模块tpool来解决这个
# 问题, tpool使用线程池来实现并发
from eventlet import tpool
DISABLE_PREFIX = 'AUTO: '
DISABLE_REASON_UNDEFINED = 'None'
# tpool.Proxy类的__str__和__repr__内置方法有问题, 这里进行patch操作
def patch_tpool_proxy():
def str_method(self):
return str(self._obj)
def repr_method(self):
return repr(self._obj)
tpool.Proxy.__str__ = str_method
tpool.Proxy.__repr__ = repr_method
patch_tpool_proxy()
def libvirt_error_handler(context, err):
pass
class ComputeDriver(object):
# 注册计算服务的事件回调方法
def register_event_listener(self, callback):
self._compute_event_callback = callback
# 处理事件
def emit_event(self, event):
if not self._compute_event_callback:
LOG.debug(_("Discarding event %s") % str(event))
return
if not isinstance(event, virtevent.Event):
raise ValueError(
_("Event must be an instance of nova.virt.event.Event"))
try:
LOG.debug(_("Emitting event %s") % str(event))
# 使用注册的回调函数处理事件
self._compute_event_callback(event)
except Exception as ex:
LOG.error(_("Exception dispatching event %(event)s: %(ex)s"),
{'event': event, 'ex': ex})
class LibvirtDriver(driver.ComputeDriver):
def __init__(self, virtapi, read_only=False):
...
self._wrapped_conn = None
self._wrapped_conn_lock = threading.Lock()
self.read_only = read_only
self._event_queue = None
# Libvirt虚拟网卡驱动, 默认libvirt.vif.LibvirtGenericVIFDriver
vif_class = importutils.import_class(CONF.libvirt.vif_driver)
self.vif_driver = vif_class(self._get_connection)
# 防火墙驱动, 这里使用的Libvirt的Iptables防火墙驱动
self.firewall_driver = firewall.load_driver(
DEFAULT_FIREWALL_DRIVER,
self.virtapi,
get_connection=self._get_connection)
# 测试Libvirt连接是否可用
@staticmethod
def _test_connection(conn):
try:
conn.getLibVersion()
return True
except libvirt.libvirtError as e:
if (e.get_error_code() in (libvirt.VIR_ERR_SYSTEM_ERROR,
libvirt.VIR_ERR_INTERNAL_ERROR) and
e.get_error_domain() in (libvirt.VIR_FROM_REMOTE,
libvirt.VIR_FROM_RPC)):
LOG.debug(_('Connection to libvirt broke'))
return False
raise
# 将event放入事件队列, 并通过管道通知事件调度loop进行处理
def _queue_event(self, event):
if self._event_queue is None:
return
self._event_queue.put(event)
c = ' '.encode()
self._event_notify_send.write(c)
self._event_notify_send.flush()
# 使能/禁用本主机的计算服务
def _set_host_enabled(self, enabled,
disable_reason=DISABLE_REASON_UNDEFINED):
status_name = {True: 'disabled',
False: 'enabled'}
disable_service = not enabled
ctx = nova_context.get_admin_context()
try:
service = service_obj.Service.get_by_compute_host(ctx, CONF.host)
# 如果服务的当前状态与将要处于的状态不一样, 那么我们才需要进行操作
if service.disabled != disable_service:
# 如果服务的当前状态是使能, 那么我们就修改数据库中服务的状态为禁用并记录禁用原因;
# 或者服务的当前状态是禁用, 并且禁用原因是以$DISABLE_PREFIX开头, 那么我们就修改
# 数据库中服务的状态为使能并清空禁用原因;
# nova-compute不会擅自做主使能自己
if not service.disabled or (
service.disabled_reason and
service.disabled_reason.startswith(DISABLE_PREFIX)):
service.disabled = disable_service
service.disabled_reason = (
DISABLE_PREFIX + disable_reason
if disable_service else DISABLE_REASON_UNDEFINED)
service.save()
LOG.debug(_('Updating compute service status to %s'),
status_name[disable_service])
else:
LOG.debug(_('Not overriding manual compute service '
'status with: %s'),
status_name[disable_service])
except exception.ComputeHostNotFound:
LOG.warn(_('Cannot update service status on host: %s,'
'since it is not registered.') % CONF.host)
except Exception:
LOG.warn(_('Cannot update service status on host: %s,'
'due to an unexpected exception.') % CONF.host,
exc_info=True)
# Libvirt连接关闭时就会调用此方法
def _close_callback(self, conn, reason, opaque):
# 将连接和关闭的原因放入事件队列中
close_info = {'conn': conn, 'reason': reason}
self._queue_event(close_info)
# 每当有domain或实例发生事件时就会调用此方法
@staticmethod
def _event_lifecycle_callback(conn, dom, event, detail, opaque):
self = opaque
# 获取domain的UUID, 这也是OpenStack中实例的UUID
uuid = dom.UUIDString()
# 将Libvirt的domain事件转换为nova-compute的virtevent,
# 并且我们只关注domain的停止、开始、挂起和恢复事件
transition = None
if event == libvirt.VIR_DOMAIN_EVENT_STOPPED:
transition = virtevent.EVENT_LIFECYCLE_STOPPED
elif event == libvirt.VIR_DOMAIN_EVENT_STARTED:
transition = virtevent.EVENT_LIFECYCLE_STARTED
elif event == libvirt.VIR_DOMAIN_EVENT_SUSPENDED:
transition = virtevent.EVENT_LIFECYCLE_PAUSED
elif event == libvirt.VIR_DOMAIN_EVENT_RESUMED:
transition = virtevent.EVENT_LIFECYCLE_RESUMED
if transition is not None:
# 如果是我们感兴趣的事件, 那么将其放入事件队列中
self._queue_event(virtevent.LifecycleEvent(uuid, transition))
# 获取虚拟化技术对应的Libvirt uri
@staticmethod
def uri():
if CONF.libvirt.virt_type == 'uml':
uri = CONF.libvirt.connection_uri or 'uml:///system'
elif CONF.libvirt.virt_type == 'xen':
uri = CONF.libvirt.connection_uri or 'xen:///'
elif CONF.libvirt.virt_type == 'lxc':
uri = CONF.libvirt.connection_uri or 'lxc:///'
else:
uri = CONF.libvirt.connection_uri or 'qemu:///system'
return uri
# 进行Libvirt连接, 并返回连接
@staticmethod
def _connect(uri, read_only):
def _connect_auth_cb(creds, opaque):
if len(creds) == 0:
return 0
LOG.warning(
_("Can not handle authentication request for %d credentials")
% len(creds))
raise exception.NovaException(
_("Can not handle authentication request for %d credentials")
% len(creds))
auth = [[libvirt.VIR_CRED_AUTHNAME,
libvirt.VIR_CRED_ECHOPROMPT,
libvirt.VIR_CRED_REALM,
libvirt.VIR_CRED_PASSPHRASE,
libvirt.VIR_CRED_NOECHOPROMPT,
libvirt.VIR_CRED_EXTERNAL],
_connect_auth_cb,
None]
try:
flags = 0
# 判断Libvirt连接是否是只读, 并修改flags
if read_only:
flags = libvirt.VIR_CONNECT_RO
# 这里使用tpool来进行非阻塞的Libvirt连接, 原本的调用方式是
# conn = libvirt.openAuth(uri, auth, flags)
# conn是libvirt.virConnect类的实例;
# 这里的原理是:在当前协程中把连接操作交由线程池去处理, 然后阻塞本协程, 把控制权交还给主循环;
# 如果不这样做, 那么整个进程就会阻塞在这里
# 这里的返回值是conn的Proxy代理, 我们之后如果要调用conn的方法, 那么可以通过此Proxy进行调用,
# 好处是直接调用conn的方法可能会阻塞整个进程, 但是通过Proxy进行调用, 依旧沿用刚才的方式处理, 不会阻塞整个进程
return tpool.proxy_call(
(libvirt.virDomain, libvirt.virConnect),
libvirt.openAuth, uri, auth, flags)
except libvirt.libvirtError as ex:
LOG.exception(_("Connection to libvirt failed: %s"), ex)
payload = dict(ip=LibvirtDriver.get_host_ip_addr(),
method='_connect',
reason=ex)
rpc.get_notifier('compute').error(nova_context.get_admin_context(),
'compute.libvirt.error',
payload)
raise exception.HypervisorUnavailable(host=CONF.host)
# 获取新的Libvirt连接, 并进行callback注册
def _get_new_connection(self):
LOG.debug(_('Connecting to libvirt: %s'), self.uri())
wrapped_conn = None
try:
# 进行Libvirt连接, 返回一个经过封装的连接
wrapped_conn = self._connect(self.uri(), self.read_only)
finally:
# 如果wrapped_conn为空, 说明连接失败, 此时禁用本主机的服务;
# 如果wrapped_conn不为空, 说明连接成功, 此时使能本主机的服务
disable_reason = DISABLE_REASON_UNDEFINED
if not wrapped_conn:
disable_reason = 'Failed to connect to libvirt'
self._set_host_enabled(bool(wrapped_conn), disable_reason)
self._wrapped_conn = wrapped_conn
try:
LOG.debug(_("Registering for lifecycle events %s"), self)
# 这里调用之前不先判断是否为空吗? 一脸问号
# 这里为domain或实例整个生命周期的事件注册callback
wrapped_conn.domainEventRegisterAny(
None,
libvirt.VIR_DOMAIN_EVENT_ID_LIFECYCLE,
self._event_lifecycle_callback,
self)
except Exception as e:
LOG.warn(_("URI %(uri)s does not support events: %(error)s"),
{'uri': self.uri(), 'error': e})
try:
LOG.debug(_("Registering for connection events: %s") %
str(self))
# 这里为Libvirt连接的关闭事件注册callback
wrapped_conn.registerCloseCallback(self._close_callback, None)
except (TypeError, AttributeError) as e:
LOG.debug(_("The version of python-libvirt does not support "
"registerCloseCallback or is too old: %s"), e)
except libvirt.libvirtError as e:
LOG.warn(_("URI %(uri)s does not support connection"
" events: %(error)s"),
{'uri': self.uri(), 'error': e})
# 返回封装的连接或None
return wrapped_conn
# 返回已有的Libvirt连接, 在必要时才进行初始化
def _get_