nova service的heatbeat机制

nova conductor这个服务启动的入口函数在nova/cmd/conductor.py 中,我们重点关注heatbeat服务
def main():

//调用service的构造函数,其位于nova/service.py中
    server = service.Service.create(binary='nova-conductor',
                                    topic=CONF.conductor.topic,
                                    manager=CONF.conductor.manager)
    workers = CONF.conductor.workers or processutils.get_worker_count()
    service.serve(server, workers=workers)
    service.wait()
service的构造函数

class Service(service.Service):
    """Service object for binaries running on hosts.

    A service takes a manager and enables rpc by listening to queues based
    on topic. It also periodically runs tasks on the manager and reports
    its state to the database services table.
    """

    def __init__(self, host, binary, topic, manager, report_interval=None,
                 periodic_enable=None, periodic_fuzzy_delay=None,
                 periodic_interval_max=None, db_allowed=True,
                 *args, **kwargs):
        super(Service, self).__init__()
        self.host = host
        self.binary = binary
        self.topic = topic
        self.manager_class_name = manager
//调用servicegroup
        self.servicegroup_api = servicegroup.API()

servicegroup的函数位于nova/servicegroup/api.py 中,最终调用join函数
class API(object):

      def join(self, member, group, service=None):
        """Add a new member to a service group.

        :param member: the joined member ID/name
        :param group: the group ID/name, of the joined member
        :param service: a `nova.service.Service` object
        """
        return self._driver.join(member, group, service)
在service中发现有两种调用数据库的方法,这里我们采用的是mysql,因此我们用的是db
_driver_name_class_mapping = {
    'db': 'nova.servicegroup.drivers.db.DbDriver',
    'mc': 'nova.servicegroup.drivers.mc.MemcachedDriver'
}
所以driver的join函数最终是在nova/servicegroup/drivers/db.py 中实现的
class DbDriver(base.Driver):

    def __init__(self, *args, **kwargs):
        self.service_down_time = CONF.service_down_time

    def join(self, member, group, service=None):
        """Add a new member to a service group.

        :param member: the joined member ID/name
        :param group: the group ID/name, of the joined member
        :param service: a `nova.service.Service` object
        """
        LOG.debug('DB_Driver: join new ServiceGroup member %(member)s to '
                  'the %(group)s group, service = %(service)s',
                  {'member': member, 'group': group,
                   'service': service})
        if service is None:
            raise RuntimeError(_('service is a mandatory argument for DB based'
                                 ' ServiceGroup driver'))
        report_interval = service.report_interval
        if report_interval:
核心代码可见是注册了一个time来定时调用_report_state
            service.tg.add_timer(report_interval, self._report_state,
                                 api.INITIAL_REPORTING_DELAY, service)
其中_report_state 同样在nova/servicegroup/drivers/db.py 中实现
  def _report_state(self, service):
        """Update the state of this service in the datastore."""

        try:
可见heatbeat的最终实现就是从db中读到report_count然后自加后在保存回去
            service.service_ref.report_count += 1
            service.service_ref.save()

            # TODO(termie): make this pattern be more elegant.
            if getattr(service, 'model_disconnected', False):
                service.model_disconnected = False
                LOG.info(
                    _LI('Recovered from being unable to report status.'))
        except messaging.MessagingTimeout:
            # NOTE(johngarbutt) during upgrade we will see messaging timeouts
            # as nova-conductor is restarted, so only log this error once.
            if not getattr(service, 'model_disconnected', False):
                service.model_disconnected = True
                LOG.warning(_LW('Lost connection to nova-conductor '
                             'for reporting service status.'))

通过在nova/servicegroup/drivers/db.py 中实现了检查服务是否up的函数
    def is_up(self, service_ref):
        """Moved from nova.utils
        Check whether a service is up based on last heartbeat.
        """
        # Keep checking 'updated_at' if 'last_seen_up' isn't set.
        # Should be able to use only 'last_seen_up' in the M release
        last_heartbeat = (service_ref.get('last_seen_up') or
            service_ref['updated_at'] or service_ref['created_at'])
        if isinstance(last_heartbeat, six.string_types):
            # NOTE(russellb) If this service_ref came in over rpc via
            # conductor, then the timestamp will be a string and needs to be
            # converted back to a datetime.
            last_heartbeat = timeutils.parse_strtime(last_heartbeat)
        else:
            # Objects have proper UTC timezones, but the timeutils comparison
            # below does not (and will fail)
            last_heartbeat = last_heartbeat.replace(tzinfo=None)
        # Timestamps in DB are UTC.
        elapsed = timeutils.delta_seconds(last_heartbeat, timeutils.utcnow())
可见是否up的判断条件是否两次heatbeat的时间是否小于service_down_time。而这个之是在init函数中通过配置文件读到的self.service_down_time = CONF.service_down_time
        is_up = abs(elapsed) <= self.service_down_time
        if not is_up:
            LOG.debug('Seems service %(binary)s on host %(host)s is down. '
                      'Last heartbeat was %(lhb)s. Elapsed time is %(el)s',
                      {'binary': service_ref.get('binary'),
                       'host': service_ref.get('host'),
                       'lhb': str(last_heartbeat), 'el': str(elapsed)})
        return is_up
   

这样当通过nova service-list 来查询服务是否up是回调到nova/api/openstack/compute/services.py

lass ServiceController(wsgi.Controller):

    
    def _get_service_detail(self, svc, additional_fields):
//service_is_up 最终会调到前面说的db中的servicegroup/api.py 中的service_is_up
        alive = self.servicegroup_api.service_is_up(svc)
        state = (alive and "up") or "down"
        active = 'enabled'
        if svc['disabled']:
            active = 'disabled'
        service_detail = {'binary': svc['binary'],
                          'host': svc['host'],
                          'id': svc['id'],
                          'zone': svc['availability_zone'],
                          'status': active,
                          'state': state,
                          'updated_at': svc['updated_at'],
                          'disabled_reason': svc['disabled_reason']}

        for field in additional_fields:
            service_detail[field] = svc[field]

        return service_detail

service_is_up 源码如下,可见最终会调用到前面将的db中的is_up来判断service是否up
    def service_is_up(self, member):
        """Check if the given member is up."""
        # NOTE(johngarbutt) no logging in this method,
        # so this doesn't slow down the scheduler
        if member.get('forced_down'):
            return False

        return self._driver.is_up(member)

  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值