使用python监控jvm版本二

版权声明:本文为博主原创文章,遵循 CC 4.0 BY-SA 版权协议,转载请附上原文出处链接和本声明。
本文链接:https://blog.csdn.net/jiangmingfei/article/details/86524570

接上篇博文

题外话:工作了许多年,基本在java圈里做运维,稍微了解一点jvm的东西,但是又不会java语言,的确是一个遗憾。

下面的代码:

  • 对上个版本并发代码做了性能测试,发现启动虚拟机占了一部分时间,其余时间主要花费在获取各个jvm数据上,多进程、多线程、多协程等都没有带来明显时间的缩减,因此改用串行;当然多进程、多线程、多协程都没有利用jpype.startJVM启动多个虚拟机,因为这样一来消耗内存比较大,对性能有影响,如果内存宽裕,不妨一试。
  • 至于jvm中相关参数,第一次从网上别人用java写的代码中抠过来的,发现老是拉取不到数据,最后使用jconsole远程到测试环境,找到正确的参数名
  • 垃圾回收的时间依然没有处理,以后有时间再处理吧
  • 下面的脚本收集10个左右的jvm数据大概要12s左右时间
  • 另外下面的数据收集最后使用python的“+”进行字符串拼接,感觉性能不是很好,看官们是否有好的建议呢

最后是一点心得:

  • 建议生产上java加上-Dapplication.name=******的参数,这样可以实现完全动态监控,找到应用名,然后找到它的jmx端口,zabbix也可以进行灵活的配置。
  • 建议使用使用pinPoint、perfino等专业apm监控工具,尤其是perfino,是非常推荐的,遗憾的是它是收费软件;同时不能忽视的是这些专业的apm监控软件或多或少都对程序有一些性能上的影响。perfino对性能影响尤其大,但是对排错有非常大的帮助,甚至能定位到某调sql执行时长。
#!/usr/bin/python
#coding: utf-8
import os
import sys
import json
import jpype
from jpype import java
from jpype import javax
#from multiprocessing.dummy import Pool as ThreadPool


# 参考:
# https://blog.nobugware.com/post/2010/11/08/jmx-query-python-cpython/
# 官网:http://jpype.sourceforge.net/

#服务路径
service_prefix_path = "/data/apps/soa/"


def discovery(service_prefix_path):
    r = {}
    r['data'] = []

    ret = os.popen("ls {0}".format(service_prefix_path))
    res = ret.read()
    for service in res.splitlines():
        if service:
            service_path = os.path.join(service_prefix_path,service)
            if os.path.islink(service_path):
                r['data'].append({'{#SERVICE}': service})
    print(json.dumps(r))


def _Get_Jmx(service,port):
    user = ""
    password = ""
    r_str = ""

    URL = "service:jmx:rmi:///jndi/rmi://127.0.0.1:%d/jmxrmi" % (port)
    #jpype.startJVM("C:\Program Files\Java\jre1.8.0_171\bin\server\jvm.dll")

    #下面会有一个连接异常的处理,让一个连接报错,不至于影响脚本继续往下执行
    try:
        jhash = java.util.HashMap()
        jarray=jpype.JArray(java.lang.String)([user,password])
        jhash.put(javax.management.remote.JMXConnector.CREDENTIALS,jarray)
        jmxurl = javax.management.remote.JMXServiceURL(URL)
        jmxsoc = javax.management.remote.JMXConnectorFactory.connect(jmxurl,jhash)
        connection = jmxsoc.getMBeanServerConnection()
    except Exception as e:
        print(e)
        # 如果连接不上,直接返回空字符串
        return ""


    #Threading
    type_str = "Threading"
    object="java.lang:type={0}".format(type_str)
    for atrribute in ["ThreadCount","TotalStartedThreadCount","PeakThreadCount","DaemonThreadCount"]:
        try:
            attr=connection.getAttribute(javax.management.ObjectName(object),atrribute)
        except Exception, e:
            pass
        else:
            attr = int(attr)
            r_str += '- jmx.{0}.{1}.[{2}]'.format(type_str, atrribute, service) + " " + str(attr) + "\n"

    #OperatingSystem
    type_str = "OperatingSystem"
    object="java.lang:type={0}".format(type_str)
    for atrribute in ["MaxFileDescriptorCount","OpenFileDescriptorCount","ProcessCpuLoad"]:
        try:
            attr=connection.getAttribute(javax.management.ObjectName(object),atrribute)
        except Exception, e:
            pass
        else:
            if atrribute == "ProcessCpuLoad":
                attr = round(float(attr),4)
            else:
                attr = int(attr)
            r_str += '- jmx.{0}.{1}.[{2}]'.format(type_str, atrribute, service) + " " + str(attr) + "\n"

    #ClassLoading
    type_str = "ClassLoading"
    object="java.lang:type={0}".format(type_str)
    for atrribute in ["LoadedClassCount","TotalLoadedClassCount","UnloadedClassCount"]:
        try:
            attr=connection.getAttribute(javax.management.ObjectName(object),atrribute)
        except Exception, e:
            pass
        else:
            attr = int(attr)
            r_str += '- jmx.{0}.{1}.[{2}]'.format(type_str, atrribute, service) + " " + str(attr) + "\n"

    #Runtime
    type_str = "Runtime"
    object="java.lang:type={0}".format(type_str)
    for atrribute in ["VmName","Uptime","VmVersion"]:
        try:
            attr=connection.getAttribute(javax.management.ObjectName(object),atrribute)
        except Exception, e:
            pass
        else:
            attr = str(attr)
            r_str += '- jmx.{0}.{1}.[{2}]'.format(type_str, atrribute, service) + " " + attr + "\n"

    #memory
    type_str = "Memory"
    object="java.lang:type={0}".format(type_str)
    for atrribute in ["HeapMemoryUsage","NonHeapMemoryUsage","ObjectPendingFinalizationCount"]:
        try:
            attr=connection.getAttribute(javax.management.ObjectName(object),atrribute)
        except Exception,e:
            pass
        else:
            if atrribute == "ObjectPendingFinalizationCount":
                r_str +=  '- jmx.{0}.{1}.[{2}]'.format(type_str,atrribute,service) + " " + str(int(attr)) + "\n"
            else:
                for branch in ["committed","max","used"]:
                    r_str += '- jmx.{0}.{1}.{2}.[{3}]'.format(type_str, atrribute,branch,service) + " " + str(int(attr.contents.get(branch))) + "\n"

    #GarbageCollector:ok,其中时间单位是s
    type_str = "GarbageCollector"
    for name in ["Copy","MarkSweepCompact","PS Scavenge","ConcurrentMarkSweep","ParNew","PS MarkSweep"]:
        object = "java.lang:type={0},name={1}".format(type_str,name)
        for atrribute in ["CollectionTime","CollectionCount"]:
            try:
                attr=connection.getAttribute(javax.management.ObjectName(object),atrribute)
            except:
                pass  #如果报错直接就没有数据
                # r_str += 'jmx["{0}",{1}].[{2}]'.format(object, atrribute, service) + " " + str(0) + "\n"
            else:
                r_str += '- jmx.{0}.{1}.{2}.[{3}]'.format(type_str,name.replace(" ","_"),atrribute,service) + " " + str(int(attr)) + "\n"

    #memoryPool
    type_str = "MemoryPool"
    for name in ["Code Cache","Metaspace","Compressed Class Space","Par Eden Space","Par Survivor Space","PS Eden Space","PS Old Gen","PS Perm Gen","PS Survivor Space","CMS Old Gen","CMS Perm Gen","Perm Gen"]:
        object = "java.lang:type={0},name={1}".format(type_str,name)
        try:
            attr=connection.getAttribute(javax.management.ObjectName(object),"Usage")
        except Exception,e:
            pass   #如果报错,直接不会有数据
            # for branch in ["committed", "used", "max"]:
            #     r_str += 'jmx["{0}",{1}.{2}].[{3}]'.format(object, "Usage", branch,service) + " " + str(0) + "\n"
        else:
            for branch in ["committed","used","max"]:
                r_str += '- jmx.{0}.{1}.{2}.{3}.[{4}]'.format(type_str,name.replace(" ","_"),"Usage",branch,service)+ " " + str(int(attr.contents.get(branch))) + "\n"

    return r_str


def _Get_Port(service_path):

    cmd = "ps -ef |grep %s |grep -v grep | awk -F'jmxremote.port=' '{print $2}' | awk '{print $1}'"  % (service_path)
    #print(cmd)
    try:
        ret = os.popen(cmd)
        res = ret.read()
    except Exception as e:
        pass

    #要不要在这里就做一个状态值出来jmx.jvm_status
    #0是正常的,2是没有启动,1是没有开启jmx
    if res:
        try:
            ret_status = int(res.splitlines()[0])
        except Exception as e:
            return None,1
        else:
            return ret_status,0
    else:
        #print("This service of {0} is not running!".format(service_path.split('/')[-1]))
        return None,2


def SendData(service_prefix_path):
    zbx_sender_cmd = "{0} -c {1} -i {2}"
    zbx_conf = "/usr/local/services/zabbix-3.0.0/etc/zabbix_agentd.conf"
    zbx_sender_file = "/tmp/.zbx_jmx_sender.txt"
    zbx_sender = "/usr/local/services/zabbix-3.0.0/bin/zabbix_sender"
    r_str = ""

    # 启动虚拟机
    jpype.startJVM("/usr/local/services/jdk1.8.0_91/jre/lib/amd64/server/libjvm.so")

    #当时测试机只有一颗cpu,多线程一开就报错,有点像是jpype的问题,有时执行频繁也会抛错
    #或者jpype本身对多线程或者多进程支持不是很好
    #而且这里用并发,花费的时间反而更多了,因此下面并发的全部注释掉了


    ret = os.popen("ls {0}".format(service_prefix_path))
    res = ret.read()

    for service in res.splitlines():
        if service:
            service_path = os.path.join(service_prefix_path,service)
            if os.path.islink(service_path):
                #调用_Get_Port函数,获取服务端口
                service_path = service_path + "/"  #可以定位的更准
                port,status = _Get_Port(service_path)
                if status == 0:
                    r_str += _Get_Jmx(service,port)
                r_str += "- jmx.jvm_status.[{0}] {1}\n".format(service,status)


    # print(r_str)

    with open(zbx_sender_file,"w") as f:
        f.write(r_str)

    send_ret = os.popen(zbx_sender_cmd.format(zbx_sender, zbx_conf, zbx_sender_file))
    #print(zbx_sender_cmd.format(zbx_sender, zbx_conf, zbx_sender_file))
    if "failed: 0" in send_ret.read():  #这一步,用一个普通的item来触发,并返回执行结果,1是正常的,0是发送异常
        print(1)
    else:
        print(0)

if __name__ == "__main__":
    if len(sys.argv) == 2 and sys.argv[1]=="discovery":
        discovery(service_prefix_path)
    elif len(sys.argv) == 1:
        SendData(service_prefix_path)
    else:
        sys.stderr.write("Args is wrong!")
展开阅读全文

没有更多推荐了,返回首页