报错:
{{{}}
{{ "error": "One or more requested checks failed to execute.",}}
{{ "checks": [}}
{{ "mesos_agent_registered_with_masters: signal: killed"}}
{{ ]}}
}
在企业版DC/OS1.11.3,开源版1.11.6以后已经对超时时间进行了优化,具体可以比较
/opt/mesosphere/lib/python3.6/site-packages/gen/calc.py
老版:
def calculate_check_config(check_time):
check_config = {
'node_checks': {
'checks': {
'components_master': {
'description': 'All DC/OS components are healthy.',
'cmd': ['/opt/mesosphere/bin/dcos-checks', '--role', 'master', 'components',
'--exclude=dcos-checks-poststart.timer,dcos-checks-poststart.service'],
'timeout': '3s',
'roles': ['master']
},
'components_agent': {
'description': 'All DC/OS components are healthy',
'cmd': ['/opt/mesosphere/bin/dcos-checks', '--role', 'agent', 'components', '--port', '61001',
'--exclude=dcos-checks-poststart.service,dcos-checks-poststart.timer'],
'timeout': '3s',
'roles': ['agent']
},
'xz': {
'description': 'The xz utility is available',
'cmd': ['/opt/mesosphere/bin/dcos-checks', 'executable', 'xz'],
'timeout': '1s'
},
'tar': {
'description': 'The tar utility is available',
'cmd': ['/opt/mesosphere/bin/dcos-checks', 'executable', 'tar'],
'timeout': '1s'
},
'curl': {
'description': 'The curl utility is available',
'cmd': ['/opt/mesosphere/bin/dcos-checks', 'executable', 'curl'],
'timeout': '1s'
},
'unzip': {
'description': 'The unzip utility is available',
'cmd': ['/opt/mesosphere/bin/dcos-checks', 'executable', 'unzip'],
'timeout': '1s'
},
'ip_detect_script': {
'description': 'The IP detect script produces valid output',
'cmd': ['/opt/mesosphere/bin/dcos-checks', 'ip'],
'timeout': '1s'
},
'mesos_master_replog_synchronized': {
'description': 'The Mesos master has synchronized its replicated log',
'cmd': ['/opt/mesosphere/bin/dcos-checks', '--role', 'master', 'mesos-metrics'],
'timeout': '1s',
'roles': ['master']
},
'mesos_agent_registered_with_masters': {
'description': 'The Mesos agent has registered with the masters',
'cmd': ['/opt/mesosphere/bin/dcos-checks', '--role', 'agent', 'mesos-metrics'],
'timeout': '1s',
'roles': ['agent']
},
'journald_dir_permissions': {
'description': 'Journald directory has the right owners and permissions',
'cmd': ['/opt/mesosphere/bin/dcos-checks', 'journald'],
'timeout': '1s',
},
},
'prestart': [],
'poststart': [
'components_master',
'components_agent',
'xz',
'tar',
'curl',
'unzip',
'ip_detect_script',
'mesos_master_replog_synchronized',
'mesos_agent_registered_with_masters',
'journald_dir_permissions',
],
},
}if check_time == 'true':
# Add the clock sync check.
clock_sync_check_name = 'clock_sync'
check_config['node_checks']['checks'][clock_sync_check_name] = {
'description': 'System clock is in sync.',
'cmd': ['/opt/mesosphere/bin/dcos-checks', 'time'],
'timeout': '1s'
}
check_config['node_checks']['poststart'].append(clock_sync_check_name)return json.dumps(check_config)
新版:
def calculate_check_config(check_time):
check_config = {
'node_checks': {
'checks': {
'components_master': {
'description': 'All DC/OS components are healthy.',
'cmd': ['/opt/mesosphere/bin/dcos-checks', '--role', 'master', 'components',
'--exclude=dcos-checks-poststart.timer,dcos-checks-poststart.service'],
'timeout': '30s',
'roles': ['master']
},
'components_agent': {
'description': 'All DC/OS components are healthy',
'cmd': ['/opt/mesosphere/bin/dcos-checks', '--role', 'agent', 'components', '--port', '61001',
'--exclude=dcos-checks-poststart.service,dcos-checks-poststart.timer'],
'timeout': '30s',
'roles': ['agent']
},
'xz': {
'description': 'The xz utility is available',
'cmd': ['/opt/mesosphere/bin/dcos-checks', 'executable', 'xz'],
'timeout': '3s'
},
'tar': {
'description': 'The tar utility is available',
'cmd': ['/opt/mesosphere/bin/dcos-checks', 'executable', 'tar'],
'timeout': '3s'
},
'curl': {
'description': 'The curl utility is available',
'cmd': ['/opt/mesosphere/bin/dcos-checks', 'executable', 'curl'],
'timeout': '3s'
},
'unzip': {
'description': 'The unzip utility is available',
'cmd': ['/opt/mesosphere/bin/dcos-checks', 'executable', 'unzip'],
'timeout': '3s'
},
'ip_detect_script': {
'description': 'The IP detect script produces valid output',
'cmd': ['/opt/mesosphere/bin/dcos-checks', 'ip'],
'timeout': '3s'
},
'mesos_master_replog_synchronized': {
'description': 'The Mesos master has synchronized its replicated log',
'cmd': ['/opt/mesosphere/bin/dcos-checks', '--role', 'master', 'mesos-metrics'],
'timeout': '30s',
'roles': ['master']
},
'mesos_agent_registered_with_masters': {
'description': 'The Mesos agent has registered with the masters',
'cmd': ['/opt/mesosphere/bin/dcos-checks', '--role', 'agent', 'mesos-metrics'],
'timeout': '30s',
'roles': ['agent']
},
'journald_dir_permissions': {
'description': 'Journald directory has the right owners and permissions',
'cmd': ['/opt/mesosphere/bin/dcos-checks', 'journald'],
'timeout': '3s',
},
},
'prestart': [],
'poststart': [
'components_master',
'components_agent',
'xz',
'tar',
'curl',
'unzip',
'ip_detect_script',
'mesos_master_replog_synchronized',
'mesos_agent_registered_with_masters',
'journald_dir_permissions',
],
},
}if check_time == 'true':
# Add the clock sync check.
clock_sync_check_name = 'clock_sync'
check_config['node_checks']['checks'][clock_sync_check_name] = {
'description': 'System clock is in sync.',
'cmd': ['/opt/mesosphere/bin/dcos-checks', 'time'],
'timeout': '3s'
}
check_config['node_checks']['poststart'].append(clock_sync_check_name)return json.dumps(check_config)
在线节点可以修改配置文件后,重启dcos-diagnostics服务即可
/opt/mesosphere/etc/dcos-diagnostics-runner-config.json