在E:\nova\nova\conf\service.py 中查看service的配置选项,可以看到有两个report_interval,service_down_time
其注释已经写的很清楚其作用了.
service_opts = [
# TODO(johngarbutt) we need a better default and minimum, in a backwards
# compatible way for report_interval
cfg.IntOpt('report_interval',
default=10,
help="""
Number of seconds indicating how frequently the state of services on a
given hypervisor is reported. Nova needs to know this to determine the
overall health of the deployment.
Related Options:
* service_down_time
report_interval should be less than service_down_time. If service_down_time
is less than report_interval, services will routinely be considered down,
because they report in too rarely.
"""),
# TODO(johngarbutt) the code enforces the min value here, but we could
# do to add some min value here, once we sort out report_interval
cfg.IntOpt('service_down_time',
default=60,
help="""
Maximum time in seconds since last check-in for up service
Each compute node periodically updates their database status based on the
specified report interval. If the compute node hasn't updated the status
for more than service_down_time, then the compute node is considered down.
Related Options:
* report_interval (service_down_time should not be less than report_interval)
"""),
这两个参数的实现在E:\nova\nova\servicegroup\api.py 中
_driver_name_class_mapping = {
'db': 'nova.servicegroup.drivers.db.DbDriver',
'mc': 'nova.servicegroup.drivers.mc.MemcachedDriver'
}
class API(object):
def __init__(self, *args, **kwargs):
'''Create an instance of the servicegroup API.
args and kwargs are passed down to the servicegroup driver when it gets
created.
'''
# Make sure report interval is less than service down time
#从配置文件中给report_interval赋值,这里的report_interval 要小于等于service_down_time
report_interval = CONF.report_interval
#目前支持的driver_class 分为db和mc 两种,一般情况下选择选择db
driver_class = _driver_name_class_mapping[CONF.servicegroup_driver]
#选择db后通过importutils.import_object导入nova.servicegroup.drivers.db.DbDriver
self._driver = importutils.import_object(driver_class,
*args, **kwargs)
#一般情况下service要调用join两江自己定时写到db数据库中,这里调用_driver.join 来写入数据库,假定我们选择的是db
def join(self, member, group, service=None):
"""Add a new member to a service group.
:param member: the joined member ID/name
:param group: the group ID/name, of the joined member
:param service: a `nova.service.Service` object
"""
return self._driver.join(member, group, service)
#查看service 时候还活着
def service_is_up(self, member):
"""Check if the given member is up."""
# NOTE(johngarbutt) no logging in this method,
# so this doesn't slow down the scheduler
if member.get('forced_down'):
return False
return self._driver.is_up(member)
db的实现在E:\nova\nova\servicegroup\drivers\db.py
class DbDriver(base.Driver):
def __init__(self, *args, **kwargs):
self.service_down_time = CONF.service_down_time
def join(self, member, group, service=None):
#如果report_interval 不为null ,则通过service本地的tg添加一个timer,这个time的到期时间是report_interval
#其回调函数是_report_state
if report_interval:
service.tg.add_timer(report_interval, self._report_state,
api.INITIAL_REPORTING_DELAY, service)
def is_up(self, service_ref):
"""Moved from nova.utils
Check whether a service is up based on last heartbeat.
"""
last_heartbeat = (service_ref.get('last_seen_up') or
service_ref['created_at'])
if isinstance(last_heartbeat, six.string_types):
# NOTE(russellb) If this service_ref came in over rpc via
# conductor, then the timestamp will be a string and needs to be
# converted back to a datetime.
last_heartbeat = timeutils.parse_strtime(last_heartbeat)
else:
# Objects have proper UTC timezones, but the timeutils comparison
# below does not (and will fail)
last_heartbeat = last_heartbeat.replace(tzinfo=None)
# Timestamps in DB are UTC.
#判断sevice是否活着是看两次在db中记录的时间是否小于service_down_time,如果小于的话,说明有一次没有执行timer的到期函数_report_state
elapsed = timeutils.delta_seconds(last_heartbeat, timeutils.utcnow())
is_up = abs(elapsed) <= self.service_down_time
if not is_up:
LOG.debug('Seems service %(binary)s on host %(host)s is down. '
'Last heartbeat was %(lhb)s. Elapsed time is %(el)s',
{'binary': service_ref.get('binary'),
'host': service_ref.get('host'),
'lhb': str(last_heartbeat), 'el': str(elapsed)})
#返回true 表示service是活着的,false则表示service已经挂了。
return is_up
def _report_state(self, service):
"""Update the state of this service in the datastore."""
try:
#原来timer的到期函数就是讲report_count加1,并保存呀。注意在进行加1操作的时候会更新last_seen_up,这是有数据库驱动来完成的
这样就会在is_up 函数中通过计算last_seen_up 两次的值来判断service是否挂了.
service.service_ref.report_count += 1
service.service_ref.save()
# TODO(termie): make this pattern be more elegant.
if getattr(service, 'model_disconnected', False):
service.model_disconnected = False
LOG.info(
_LI('Recovered from being unable to report status.'))
except messaging.MessagingTimeout:
# NOTE(johngarbutt) during upgrade we will see messaging timeouts
# as nova-conductor is restarted, so only log this error once.
if not getattr(service, 'model_disconnected', False):
service.model_disconnected = True
LOG.warning(_LW('Lost connection to nova-conductor '
'for reporting service status.'))