#!/usr/bin/env python#-*- coding: utf-8 -*-#Filename: jvm_monitor#Description: collect jvm info#Author: quke#Date: 2018/8/22
importbase64importdatetimeimportjsonimportlogging.handlersimportosimportrandomimportreimportsocketimporttimefrom subprocess importPopen, PIPEimportMySQLdbimportrequestsfrom requests.adapters importHTTPAdapterfrom requests.packages.urllib3.util.retry importRetry
logging.basicConfig(level=logging.INFO,
format='%(asctime)s [%(levelname)s] [%(filename)s:%(lineno)s - %(funcName)20s() ] %(message)s',
datefmt='%Y-%m-%d %H:%M:%S',
)
console_handler=logging.StreamHandler()
file_handler= logging.handlers.RotatingFileHandler('jvm_monitor.log', maxBytes=10485760, backupCount=5)
logger= logging.getLogger(__name__)
logger.addHandler(file_handler)
hostname=socket.gethostname()defrun_command(cmd):
process= Popen(cmd, shell=True, stdout=PIPE, stderr=PIPE)
stdout, stderr=process.communicate()ifstderr:
logger.error('Excepiton with run %s:%s' %(cmd, stderr))raiseSystemExitelse:return stdout.strip('\n').split('\n')defrequests_retry(
retries=3,
backoff_factor=0.3,
status_forcelist=(500, 502, 504),
session=None,
):
session= session orrequests.Session()
retry=Retry(
total=retries,
read=retries,
connect=retries,
backoff_factor=backoff_factor,
status_forcelist=status_forcelist,
)
adapter= HTTPAdapter(max_retries=retry)
session.mount('http://', adapter)
session.mount('https://', adapter)returnsessiondef execute_sql(sql, host='192.168.1.1', user='user', password='password', db='db'):
db=MySQLdb.connect(host, user, password, db)
cursor=db.cursor()
cursor.execute(sql)if 'insert' in sql or 'update' insql:
db.commit()
ret=cursor.fetchall()
cursor.close()
db.close()returnretdefget_all_mixed_info():
sql= 'select mixhost,module,alias from cmdb_mixed_relation'db_detail= execute_sql(sql, host='192.168.1.1', user='user', password='password', db='db')
ret={}for obj indb_detail:
hostname, modulename, alias=obj
ret.setdefault(hostname, {}).update({modulename: alias})returnretdefget_java_module(args):
cur_dir= '/apps'
for d inos.listdir(cur_dir):ifos.path.isdir(os.path.join(cur_dir, d)):if 'java' in d or 'boot' in d or 'tomcat' in d or 'mycat' ind:if d inargs:returnddefget_alias(module_name):
all_alias=get_all_mixed_info()
alias=all_alias.get(hostname, {}).get(module_name)
alias= alias if alias else 'null'
returnaliasdefget_gc_collector_name(line):for gc in ['UseParNewGC', 'UseG1GC', 'UseSerialGC', 'UseParallelGC']:if gc inline:
ygc=gcbreak
else:
ygc= 'ParNew'
for gc in ['UseConcMarkSweepGC', 'UseG1GC', 'UseParallelOldGC', 'UseSerialGC']:if gc inline:
ogc=gcbreak
else:
ogc= 'CMS'
returnygc, ogcdefget_start_time(pid):
ret= run_command('ps -o lstart -p %s' %pid)
start_time= time.strftime('%Y-%m-%d %H:%M:%S', time.strptime(ret[1], '%a %b %d %H:%M:%S %Y'))returnstart_timedefget_jstat_info(pid):
ret= run_command('jstat -gc %s' %pid)
rc=re.compile(
r'(?P[0-9.]+)\s+(?P[0-9.]+)\s+(?P[0-9.]+)\s+(?P[0-9.]+)\s+(?P[0-9.]+)\s+(?P[0-9.]+)\s+(?P[0-9.]+)\s+(?P[0-9.]+)\s+(?P[0-9.]+)\s+(?P[0-9.]+)\s+(?P[0-9.]+)\s+(?P[0-9.]+)\s+(?P[0-9.]+)\s+(?P[0-9.]+)\s+(?P[0-9.]+)')
gc_statistics= rc.match(ret[1]).groupdict()returngc_statisticsdefget_thread_count(pid):
ret= run_command('jstat -snap %s' %pid)
active_thread_count= ret[-3].split('=')[1]
total_thread_count= ret[-1].split('=')[1]returnactive_thread_count, total_thread_countdefget_jvm_info():
instances=[]
ret= run_command('jps -mlv')for line inret:if line and 'sun.tools.jps.Jps' not in line and 'com.lagou.jmonitor.AgentWatcher' not inline:
module=get_java_module(line)
alias= hostname if module in hostname elseget_alias(module)if 'null' ==alias:
logger.error('[%s] can not get mixed module alias name , continue' %module)continueygc, ogc=get_gc_collector_name(line)
instances_list= line.split(' ')
pid=instances_list[0]
start_time=get_start_time(pid)
gc_statistics=get_jstat_info(pid)
active_thread_count, total_thread_count=get_thread_count(pid)
main_function= instances_list[1]
main_args= ' '.join(instances_list[2:])
instances.append(
dict(
pid=pid,
module=module,
alias=alias,
start_time=start_time,
gc_statistics=gc_statistics,
active_thread_count=active_thread_count,
total_thread_count=total_thread_count,
ygc=ygc,
ogc=ogc,
main_function=main_function,
main_args=main_args
)
)returninstancesdefpush_to_oss(jvm):
modulename= jvm.get('module')
hostname= jvm.get('alias')
pid= jvm.get('pid')
mainclassname= jvm.get('main_function')
vmparam= jvm.get('main_args')
updated= jvm.get('start_time')
gclist=json.dumps(
[dict(useTime=jvm['gc_statistics']['jvmYgct'], name=jvm['ygc'], times=jvm['gc_statistics']['jvmYgc']),
dict(useTime=jvm['gc_statistics']['jvmFgct'], name=jvm['ogc'], times=jvm['gc_statistics']['jvmFgc'])])
fgcygc= json.dumps(dict(jvmFgc=jvm['gc_statistics']['jvmFgc'],
jvmYgc=jvm['gc_statistics']['jvmYgc'],
jvmFgct=jvm['gc_statistics']['jvmFgct'],
jvmYgct=jvm['gc_statistics']['jvmYgct'], ))
get_hostnames_sql= 'select hostname,modulename from jvmmonitordata where modulename="%s"' %modulename
ignore_hostname_ne_modulename= 'select hostname from jvmmonitordata where hostname="%s"' %hostname
logger.info('execute sql :%s' %get_hostnames_sql)
is_existing=Falsefor obj inexecute_sql(get_hostnames_sql):if hostname inobj:
is_existing=Truefor obj inexecute_sql(ignore_hostname_ne_modulename):if hostname inobj:
is_existing=Trueifis_existing:
update_jvmmonitordata_sql= "update jvmmonitordata set pid=%d,gclist='%s',fgcygc='%s' where hostname='%s'" %(
int(pid), gclist, fgcygc, hostname)
logger.info('execute sql :%s' %update_jvmmonitordata_sql)
execute_sql(update_jvmmonitordata_sql)else:
insert_jvmmonitordata_sql= "insert into jvmmonitordata(hostname,modulename,mainclassname,pid,vmparam,gclist,updated,fgcygc) values ('%s','%s','%s',%d,'%s','%s','%s','%s')" %(
hostname, modulename, mainclassname, int(pid), vmparam, gclist, updated, fgcygc)
logger.info('execute sql :%s' %insert_jvmmonitordata_sql)
execute_sql(insert_jvmmonitordata_sql)defget_hbase_svr():
hbase_list= ["http://192.168.100.1:8080", "http://192.168.100.2:8080", "http://192.168.100.3:8080"]
hbase_url=None
retry= 10
while retry >0:
hbase_url=random.choice(hbase_list)try:
r= requests.head(hbase_url, timeout=2)except:
logger.info("connect" + hbase_url + "error, try another")else:if r.status_code == 200:breakretry-= 1
if retry ==0:
logger.error("connect hbase failed with 10 times")returnhbase_urldefbuild_hbase_data(jvm):
hostName= jvm['alias']
jvmEc= float(jvm['gc_statistics']['ec']) * 1000jvmEu= float(jvm['gc_statistics']['eu']) * 1000jvmOc= float(jvm['gc_statistics']['oc']) * 1000jvmOu= float(jvm['gc_statistics']['ou']) * 1000jvmPc= float(jvm['gc_statistics']['pc']) * 1000jvmPu= float(jvm['gc_statistics']['pu']) * 1000jvmSc= (float(jvm['gc_statistics']['s0c']) + float(jvm['gc_statistics']['s1c'])) * 1000jvmSu= (float(jvm['gc_statistics']['s0u']) + float(jvm['gc_statistics']['s1u'])) * 1000totalThreadCount= int(jvm['total_thread_count'])
activeThreadCount= int(jvm['active_thread_count'])returndict(
hostName=hostName,
jvmEc=int(jvmEc),
jvmEu=int(jvmEu),
jvmOc=int(jvmOc),
jvmOu=int(jvmOu),
jvmPc=int(jvmPc),
jvmPu=int(jvmPu),
jvmSc=int(jvmSc),
jvmSu=int(jvmSu),
totalThreadCount=totalThreadCount,
activeThreadCount=activeThreadCount,
)defjvm_hbase_constructor(jvm):"""jvm hbase 数据构造器"""data=build_hbase_data(jvm)
rows=[]
json_rows= {"Row": rows}
row_key= base64.b64encode(data['hostName'] + ":" + datetime.datetime.now().strftime('%Y%m%d%H%M'))
cell=[]for column in ['jvmEc', 'jvmEu', 'jvmOc', 'jvmOu', 'jvmPc', 'jvmPu', 'jvmSc', 'jvmSu','totalThreadCount', 'activeThreadCount']:
cell.append({"column": base64.b64encode('jvm' + ":" + column), "$": base64.b64encode(str(data[column]))})
rows.append({'key': row_key, 'Cell': cell})returnrow_key, json_rowsdefpush_to_hbase(jvm):
table_name= 'jvm'
try:
row_key, json_rows=jvm_hbase_constructor(jvm)exceptException as e:
logger.error("construct hbase data error %s" %str(e))else:for i in range(10):
hbase_url=get_hbase_svr()try:
response= requests.post(hbase_url + '/' + table_name + '/' + row_key, data=json.dumps(json_rows),
headers={"Content-Type": "application/json", "Accept": "application/json"},
timeout=60)if response.status_code == 200:break
except:pass
if i == 9:
logger.error("try to save hbase failed with 10 times,exit")defpush_data(jvm_infos):for jvm injvm_infos:
push_to_oss(jvm)
push_to_hbase(jvm)if __name__ == '__main__':
jvm_infos=get_jvm_info()
push_data(jvm_infos)