HotSpot Serviceability Agent 实现浅析#1

今天来看看HotSpotVM强大的SA,底层到底是怎么实现的。官方文档对其实现机制有以下描述:

SA consists mostly of Java classes but it contains a small amount of native code to read raw bits from processes and core files.

  • On Solaris SA uses libproc to read bits from a process or a core file.
  • On Linux, SA uses a mix of /proc and ptrace (mostly the latter) to read bits from a process. For core files, SA parses ELF files directly.
  • On Windows, SA uses the Windows dbgeng.dll library to read the raw bits from processes and core files. An alternate implementation uses Windows process debugging primitives, but this only works for live processes.

也就是说,在Linux平台上,是使用了/procptrace。下面就坑进代码里面看看到底是怎么用的。

PTRACE_ATTACH

SA工具的基类是Tool,通过调用start方法启动,该方法里面会new一个BugSpotAgent,并且attach到目标VM上面。下面来看看通过PID进行attach的方法,BugSpotAgent#attach(int)

  /** This attaches to a process running on the local machine. */
    public synchronized void attach(int processID)
    throws DebuggerException {
        if (debugger != null) {
            throw new DebuggerException("Already attached");
        }
        pid = processID;
        startupMode = PROCESS_MODE;
        isServer = false;
        go();
    }


 private void go() {
        setupDebugger();
        javaMode = setupVM();
    }

接下来看看setupDebuggersetupVM都干了啥。

setupDebugger

setupDebugger会根据不同平台来设置debugger,Linux平台下,setupDebuggerLinux

  private void setupDebuggerLinux() {
        
        // 1. 设置虚拟机库文件名
        
        setupJVMLibNamesLinux();

        
        // 2. new一个LinuxDebuggerLocal
        
        if (cpu.equals("x86")) {
            machDesc = new MachineDescriptionIntelX86();
        } else if (cpu.equals("ia64")) {
            machDesc = new MachineDescriptionIA64();
        } else if (cpu.equals("amd64")) {
            machDesc = new MachineDescriptionAMD64();
        } else if (cpu.equals("sparc")) {
            if (LinuxDebuggerLocal.getAddressSize()==8) {
               machDesc = new MachineDescriptionSPARC64Bit();
            } else {
               machDesc = new MachineDescriptionSPARC32Bit();
            }
        } else {
          try {
            machDesc = (MachineDescription)
              Class.forName("sun.jvm.hotspot.debugger.MachineDescription" +
              cpu.toUpperCase()).newInstance();
          } catch (Exception e) {
            throw new DebuggerException("unsupported machine type");
          }
        }
        // Note we do not use a cache for the local debugger in server
        // mode; it will be taken care of on the client side (once remote
        // debugging is implemented).
        debugger = new LinuxDebuggerLocal(machDesc, !isServer);

        
        // 3. 调用LinuxDebuggerLocal的attach方法
        
        attachDebugger();
    }

来看下 LinuxDebuggerLocal#attach(int) 方法,
  /** From the Debugger interface via JVMDebugger */
    public synchronized void attach(int processID) throws DebuggerException {
        checkAttached();
        threadList = new ArrayList();
        loadObjectList = new ArrayList();
        class AttachTask implements WorkerThreadTask {
           int pid;
           public void doit(LinuxDebuggerLocal debugger) {
              debugger.attach0(pid);
              debugger.attached = true;
              debugger.isCore = false;
              findABIVersion();
           }
        }

        AttachTask task = new AttachTask();
        task.pid = processID;
        workerThread.execute(task);
    }

最终是调用了一个本地方法, attach0
  private native void attach0(int pid)
                                throws DebuggerException;

它的 实现在LinuxDebuggerLocal.c
/*
 * Class:     sun_jvm_hotspot_debugger_linux_LinuxDebuggerLocal
 * Method:    attach0
 * Signature: (I)V
 */
JNIEXPORT void JNICALL Java_sun_jvm_hotspot_debugger_linux_LinuxDebuggerLocal_attach0__I
  (JNIEnv *env, jobject this_obj, jint jpid) {

  struct ps_prochandle* ph;
  if ( (ph = Pgrab(jpid)) == NULL) {
    THROW_NEW_DEBUGGER_EXCEPTION("Can't attach to the process");
  }
  (*env)->SetLongField(env, this_obj, p_ps_prochandle_ID, (jlong)(intptr_t)ph);
  fillThreadsAndLoadObjects(env, this_obj, ph);
}

Pgrab方法
// attach to the process. One and only one exposed stuff
struct ps_prochandle* Pgrab(pid_t pid) {
  struct ps_prochandle* ph = NULL;
  thread_info* thr = NULL;

  if ( (ph = (struct ps_prochandle*) calloc(1, sizeof(struct ps_prochandle))) == NULL) {
     print_debug("can't allocate memory for ps_prochandle\n");
     return NULL;
  }

  
  // 1. attach到目标进程
  
  if (ptrace_attach(pid) != true) {
     free(ph);
     return NULL;
  }

  
  // 2. 填充ps_prochandle
  

  // initialize ps_prochandle
  ph->pid = pid;

  // initialize vtable
  ph->ops = &process_ops;

  
  // 读取目标进程的库文件信息和符号表
  
  // read library info and symbol tables, must do this before attaching threads,
  // as the symbols in the pthread library will be used to figure out
  // the list of threads within the same process.
  read_lib_info(ph);

  // read thread info
  read_thread_info(ph, add_new_thread);

  // attach to the threads
  thr = ph->threads;
  while (thr) {
     // don't attach to the main thread again
     if (ph->pid != thr->lwp_id && ptrace_attach(thr->lwp_id) != true) {
        // even if one attach fails, we get return NULL
        Prelease(ph);
        return NULL;
     }
     thr = thr->next;
  }
  return ph;
}

先来看是怎么attach到目标进程的,
// attach to a process/thread specified by "pid"
static bool ptrace_attach(pid_t pid) {
  if (ptrace(PTRACE_ATTACH, pid, NULL, NULL) < 0) {
    print_debug("ptrace(PTRACE_ATTACH, ..) failed for %d\n", pid);
    return false;
  } else {
    return ptrace_waitpid(pid);
  }
}

妥妥的,这里就能看到是使用了ptraceptrace的具体用法参考man文档,这里就不展开了。

然后还有另一个方法值得注意,就是获取库文件跟符号表信息的read_lib_info方法

static bool read_lib_info(struct ps_prochandle* ph) {
  char fname[32];
  char buf[256];
  FILE *fp = NULL;

  
  // 1. 打开/proc/[pid]/maps文件
  
  sprintf(fname, "/proc/%d/maps", ph->pid);
  fp = fopen(fname, "r");
  if (fp == NULL) {
    print_debug("can't open /proc/%d/maps file\n", ph->pid);
    return false;
  }

  
  // 2. 获取库文件信息
  
  while(fgets_no_cr(buf, 256, fp)){
    char * word[6];
    int nwords = split_n_str(buf, 6, word, ' ', '\0');
    if (nwords > 5 && find_lib(ph, word[5]) == false) {
       intptr_t base;
       lib_info* lib;
#ifdef _LP64
       sscanf(word[0], "%lx", &base);
#else
       sscanf(word[0], "%x", &base);
#endif
       
       // 3. 添加库文件与符号表信息
       
       if ((lib = add_lib_info(ph, word[5], (uintptr_t)base)) == NULL)
          continue; // ignore, add_lib_info prints error

       // we don't need to keep the library open, symtab is already
       // built. Only for core dump we need to keep the fd open.
       close(lib->fd);
       lib->fd = -1;
    }
  }
  fclose(fp);
  return true;
}

/proc/[pid]/maps文件的内容参考man文档,上面的word[0]address字段,

The address field is the address space in the process that the mapping occupies.

word[5]pathname字段,

The pathname field will usually be the file that is backing the mapping.

是类似/opt/xxx/install/jdk-1.7.0_51/jre/lib/amd64/server/libjvm.so这样的路径。

库文件与符号表信息的添加,add_lib_info

lib_info* add_lib_info(struct ps_prochandle* ph, const char* libname, uintptr_t base) {
   return add_lib_info_fd(ph, libname, -1, base);
}


lib_info* add_lib_info_fd(struct ps_prochandle* ph, const char* libname, int fd, uintptr_t base) {
   lib_info* newlib;

   if ( (newlib = (lib_info*) calloc(1, sizeof(struct lib_info))) == NULL) {
      print_debug("can't allocate memory for lib_info\n");
      return NULL;
   }

   strncpy(newlib->name, libname, sizeof(newlib->name));
   newlib->base = base;

   if (fd == -1) {
      if ( (newlib->fd = pathmap_open(newlib->name)) < 0) {
         print_debug("can't open shared object %s\n", newlib->name);
         free(newlib);
         return NULL;
      }
   } else {
      newlib->fd = fd;
   }

   // check whether we have got an ELF file. /proc/<pid>/map
   // gives out all file mappings and not just shared objects
   if (is_elf_file(newlib->fd) == false) {
      close(newlib->fd);
      free(newlib);
      return NULL;
   }

   
   // 添加符号表信息
   
   newlib->symtab = build_symtab(newlib->fd, libname);
   if (newlib->symtab == NULL) {
      print_debug("symbol table build failed for %s\n", newlib->name);
   }

   // even if symbol table building fails, we add the lib_info.
   // This is because we may need to read from the ELF file for core file
   // address read functionality. lookup_symbol checks for NULL symtab.
   if (ph->libs) {
      ph->lib_tail->next = newlib;
      ph->lib_tail = newlib;
   }  else {
      ph->libs = ph->lib_tail = newlib;
   }
   ph->num_libs++;

   return newlib;
}

符号表的读取在build_symtab方法,暂时就先不深究了。

setupVM

setupDebugger结束之后,需要setupVM

  private boolean setupVM() {
        // We need to instantiate a HotSpotTypeDataBase on both the client
        // and server machine. On the server it is only currently used to
        // configure the Java primitive type sizes (which we should
        // consider making constant). On the client it is used to
        // configure the VM.

        
        // 1. 构建HotSpotTypeDataBase
        
        try {
            if (os.equals("solaris")) {
                db = new HotSpotTypeDataBase(machDesc, new HotSpotSolarisVtblAccess(debugger, jvmLibNames),
                debugger, jvmLibNames);
            } else if (os.equals("win32")) {
                db = new HotSpotTypeDataBase(machDesc, new Win32VtblAccess(debugger, jvmLibNames),
                debugger, jvmLibNames);
            } else if (os.equals("linux")) {
                db = new HotSpotTypeDataBase(machDesc, new LinuxVtblAccess(debugger, jvmLibNames),
                debugger, jvmLibNames);
            } else if (os.equals("bsd")) {
                db = new HotSpotTypeDataBase(machDesc, new BsdVtblAccess(debugger, jvmLibNames),
                debugger, jvmLibNames);
            } else {
                throw new DebuggerException("OS \"" + os + "\" not yet supported (no VtblAccess implemented yet)");
            }
        }
        catch (NoSuchSymbolException e) {
            e.printStackTrace();
            return false;
        }

        
        // 2. 设置原生类型大小,从目标VM获取 
        
        if (startupMode != REMOTE_MODE) {
            // Configure the debugger with the primitive type sizes just obtained from the VM
            debugger.configureJavaPrimitiveTypeSizes(db.getJBooleanType().getSize(),
            db.getJByteType().getSize(),
            db.getJCharType().getSize(),
            db.getJDoubleType().getSize(),
            db.getJFloatType().getSize(),
            db.getJIntType().getSize(),
            db.getJLongType().getSize(),
            db.getJShortType().getSize());
        }

        
        // 3. 构建目标VM的本机表示
        
        if (!isServer) {
            // Do not initialize the VM on the server (unnecessary, since it's
            // instantiated on the client)
            VM.initialize(db, debugger);
        }

        try {
            jvmdi = new ServiceabilityAgentJVMDIModule(debugger, saLibNames);
            if (jvmdi.canAttach()) {
                jvmdi.attach();
                jvmdi.setCommandTimeout(6000);
                debugPrintln("Attached to Serviceability Agent's JVMDI module.");
                // Jog VM to suspended point with JVMDI module
                resume();
                suspendJava();
                suspend();
                debugPrintln("Suspended all Java threads.");
            } else {
                debugPrintln("Could not locate SA's JVMDI module; skipping attachment");
                jvmdi = null;
            }
        } catch (Exception e) {
            e.printStackTrace();
            jvmdi = null;
        }

        return true;
    }

HotSpotTypeDataBase

HotSpotTypeDataBase存储了目标VM上面的类型信息。来看它的构造函数

  /** <P> This requires a SymbolLookup mechanism as well as the
      MachineDescription. Note that we do not need a NameMangler since
      we use the vmStructs mechanism to avoid looking up C++
      symbols. </P>

      <P> NOTE that it is guaranteed that this constructor will not
      attempt to fetch any Java values from the remote process, only C
      integers and addresses. This is required because we are fetching
      the sizes of the Java primitive types from the remote process,
      implying that attempting to fetch them before their sizes are
      known is illegal. </P>

      <P> Throws NoSuchSymbolException if a problem occurred while
      looking up one of the bootstrapping symbols related to the
      VMStructs table in the remote VM; this may indicate that the
      remote process is not actually a HotSpot VM. </P>
  */
  public HotSpotTypeDataBase(MachineDescription machDesc,
                             VtblAccess vtblAccess,
                             Debugger symbolLookup,
                             String[] jvmLibNames) throws NoSuchSymbolException {
    super(machDesc, vtblAccess);
    this.symbolLookup = symbolLookup;
    this.jvmLibNames = jvmLibNames;

    readVMTypes();
    initializePrimitiveTypes();
    readVMStructs();
    readVMIntConstants();
    readVMLongConstants();
    readExternalDefinitions();
  }

几个readXXX方法都是从HotSpotVM中读取信息。下面以readVMStructs方法为例来看看。

readVMStructs

 private void readVMStructs() {
    
    // VMStructEntry结构体各个成员变量的offset
    
    // Get the variables we need in order to traverse the VMStructEntry[]
    long structEntryTypeNameOffset;
    long structEntryFieldNameOffset;
    long structEntryTypeStringOffset;
    long structEntryIsStaticOffset;
    long structEntryOffsetOffset;
    long structEntryAddressOffset;
    long structEntryArrayStride;

    structEntryTypeNameOffset     = getLongValueFromProcess("gHotSpotVMStructEntryTypeNameOffset");
    structEntryFieldNameOffset    = getLongValueFromProcess("gHotSpotVMStructEntryFieldNameOffset");
    structEntryTypeStringOffset   = getLongValueFromProcess("gHotSpotVMStructEntryTypeStringOffset");
    structEntryIsStaticOffset     = getLongValueFromProcess("gHotSpotVMStructEntryIsStaticOffset");
    structEntryOffsetOffset       = getLongValueFromProcess("gHotSpotVMStructEntryOffsetOffset");
    structEntryAddressOffset      = getLongValueFromProcess("gHotSpotVMStructEntryAddressOffset");
    structEntryArrayStride        = getLongValueFromProcess("gHotSpotVMStructEntryArrayStride");

    
    // 通过符号表查找目标VM中gHotSpotVMStructs变量的地址
    
    // Fetch the address of the VMStructEntry*
    Address entryAddr = lookupInProcess("gHotSpotVMStructs");
    // Dereference this once to get the pointer to the first VMStructEntry
    entryAddr = entryAddr.getAddressAt(0);
    if (entryAddr == null) {
      throw new RuntimeException("gHotSpotVMStructs was not initialized properly in the remote process; can not continue");
    }

    // Start iterating down it until we find an entry with no name
    Address fieldNameAddr = null;
    String typeName = null;
    String fieldName = null;
    String typeString = null;
    boolean isStatic = false;
    long offset = 0;
    Address staticFieldAddr = null;
    long size = 0;
    long index = 0;
    String opaqueName = "<opaque>";
    lookupOrCreateClass(opaqueName, false, false, false);

    do {

      
      // 读取VMStructEntry各个成员变量的值
      

      // Fetch the field name first
      fieldNameAddr = entryAddr.getAddressAt(structEntryFieldNameOffset);
      if (fieldNameAddr != null) {
        fieldName = CStringUtilities.getString(fieldNameAddr);

        // Now the rest of the names. Keep in mind that the type name
        // may be NULL, indicating that the type is opaque.
        Address addr = entryAddr.getAddressAt(structEntryTypeNameOffset);
        if (addr == null) {
          throw new RuntimeException("gHotSpotVMStructs unexpectedly had a NULL type name at index " + index);
        }
        typeName = CStringUtilities.getString(addr);

        addr = entryAddr.getAddressAt(structEntryTypeStringOffset);
        if (addr == null) {
          typeString = opaqueName;
        } else {
          typeString = CStringUtilities.getString(addr);
        }

        isStatic = !(entryAddr.getCIntegerAt(structEntryIsStaticOffset, C_INT32_SIZE, false) == 0);
        if (isStatic) {
          staticFieldAddr = entryAddr.getAddressAt(structEntryAddressOffset);
          offset = 0;
        } else {
          offset = entryAddr.getCIntegerAt(structEntryOffsetOffset, C_INT64_SIZE, true);
          staticFieldAddr = null;
        }

        // The containing Type must already be in the database -- no exceptions
        BasicType containingType = lookupOrFail(typeName);

        // The field's Type must already be in the database -- no exceptions
        BasicType fieldType = (BasicType)lookupType(typeString);

        
        // 根据VMStructEntry创建对应的Field
        
        // Create field by type
        createField(containingType, fieldName, fieldType,
                    isStatic, offset, staticFieldAddr);
      }

      ++index;
      
      // 下一个VMStructEntry
      
      entryAddr = entryAddr.addOffsetTo(structEntryArrayStride);
    } while (fieldNameAddr != null);
  }

HotSpot中的 VMStructEntry 定义在 vmStructs.hpp
typedef struct {
  const char* typeName;            // The type name containing the given field (example: "Klass")
  const char* fieldName;           // The field name within the type           (example: "_name")
  const char* typeString;          // Quoted name of the type of this field (example: "Symbol*";
                                   // parsed in Java to ensure type correctness
  int32_t  isStatic;               // Indicates whether following field is an offset or an address
  uint64_t offset;                 // Offset of field within structure; only used for nonstatic fields
  void* address;                   // Address of field; only used for static fields
                                   // ("offset" can not be reused because of apparent SparcWorks compiler bug
                                   // in generation of initializer data)
} VMStructEntry;

gHotSpotVMStructs 定义在 vmStructs.cpp
JNIEXPORT VMStructEntry* gHotSpotVMStructs                 = VMStructs::localHotSpotVMStructs;


// These initializers are allowed to access private fields in classes
// as long as class VMStructs is a friend
VMStructEntry VMStructs::localHotSpotVMStructs[] = {

  VM_STRUCTS(GENERATE_NONSTATIC_VM_STRUCT_ENTRY, \
             GENERATE_STATIC_VM_STRUCT_ENTRY, \
             GENERATE_UNCHECKED_NONSTATIC_VM_STRUCT_ENTRY, \
             GENERATE_NONSTATIC_VM_STRUCT_ENTRY, \
             GENERATE_NONPRODUCT_NONSTATIC_VM_STRUCT_ENTRY, \
             GENERATE_C1_NONSTATIC_VM_STRUCT_ENTRY, \
             GENERATE_C2_NONSTATIC_VM_STRUCT_ENTRY, \
             GENERATE_C1_UNCHECKED_STATIC_VM_STRUCT_ENTRY, \
             GENERATE_C2_UNCHECKED_STATIC_VM_STRUCT_ENTRY, \
             GENERATE_VM_STRUCT_LAST_ENTRY)

#ifndef SERIALGC
  VM_STRUCTS_PARALLELGC(GENERATE_NONSTATIC_VM_STRUCT_ENTRY, \
                        GENERATE_STATIC_VM_STRUCT_ENTRY)

  VM_STRUCTS_CMS(GENERATE_NONSTATIC_VM_STRUCT_ENTRY, \
                 GENERATE_NONSTATIC_VM_STRUCT_ENTRY, \
                 GENERATE_STATIC_VM_STRUCT_ENTRY)

  VM_STRUCTS_G1(GENERATE_NONSTATIC_VM_STRUCT_ENTRY, \
                GENERATE_STATIC_VM_STRUCT_ENTRY)
#endif // SERIALGC

  VM_STRUCTS_CPU(GENERATE_NONSTATIC_VM_STRUCT_ENTRY, \
                 GENERATE_STATIC_VM_STRUCT_ENTRY, \
                 GENERATE_UNCHECKED_NONSTATIC_VM_STRUCT_ENTRY, \
                 GENERATE_NONSTATIC_VM_STRUCT_ENTRY, \
                 GENERATE_NONPRODUCT_NONSTATIC_VM_STRUCT_ENTRY, \
                 GENERATE_C2_NONSTATIC_VM_STRUCT_ENTRY, \
                 GENERATE_C1_UNCHECKED_STATIC_VM_STRUCT_ENTRY, \
                 GENERATE_C2_UNCHECKED_STATIC_VM_STRUCT_ENTRY, \
                 GENERATE_VM_STRUCT_LAST_ENTRY)

  VM_STRUCTS_OS_CPU(GENERATE_NONSTATIC_VM_STRUCT_ENTRY, \
                    GENERATE_STATIC_VM_STRUCT_ENTRY, \
                    GENERATE_UNCHECKED_NONSTATIC_VM_STRUCT_ENTRY, \
                    GENERATE_NONSTATIC_VM_STRUCT_ENTRY, \
                    GENERATE_NONPRODUCT_NONSTATIC_VM_STRUCT_ENTRY, \
                    GENERATE_C2_NONSTATIC_VM_STRUCT_ENTRY, \
                    GENERATE_C1_UNCHECKED_STATIC_VM_STRUCT_ENTRY, \
                    GENERATE_C2_UNCHECKED_STATIC_VM_STRUCT_ENTRY, \
                    GENERATE_VM_STRUCT_LAST_ENTRY)
};

VM_STRUCTS 是个宏定义,
#define VM_STRUCTS(nonstatic_field, \
                   static_field, \
                   unchecked_nonstatic_field, \
                   volatile_nonstatic_field, \
                   nonproduct_nonstatic_field, \
                   c1_nonstatic_field, \
                   c2_nonstatic_field, \
                   unchecked_c1_static_field, \
                   unchecked_c2_static_field, \
                   last_entry) \

    ...
    static_field(Threads,                     _thread_list,                                  JavaThread*)                           \
    ...

这个宏定义了所有SA中需要用到的HotSpotVM的字段类型信息,上面只贴出了Threads_thread_list字段。

对于localHotSpotVMStructs的定义,宏展开后,以_thread_list为例,

GENERATE_STATIC_VM_STRUCT_ENTRY(Threads,                     _thread_list,                      

来看看 GENERATE_NONSTATIC_VM_STRUCT_ENTRY GENERATE_STATIC_VM_STRUCT_ENTRY ,也是宏,
// This macro generates a VMStructEntry line for a nonstatic field
#define GENERATE_NONSTATIC_VM_STRUCT_ENTRY(typeName, fieldName, type)              \
 { QUOTE(typeName), QUOTE(fieldName), QUOTE(type), 0, cast_uint64_t(offset_of(typeName, fieldName)), NULL },

// This macro generates a VMStructEntry line for a static field
#define GENERATE_STATIC_VM_STRUCT_ENTRY(typeName, fieldName, type)                 \
 { QUOTE(typeName), QUOTE(fieldName), QUOTE(type), 1, 0, &typeName::fieldName },

对应了VMStructEntry的6个字段,其中比较巧妙的就是offset字段跟address字段的计算。offset是非静态变量在结构体中地址的偏移量,address是静态字段的地址。

静态字段比较简单,&typeName::fieldName,直接使用&运算就可以获取地址。

非静态字段获取偏移量就要复杂一点,通过offset_of方法

// HACK: gcc warns about applying offsetof() to non-POD object or calculating
//       offset directly when base address is NULL. Use 16 to get around the
//       warning. gcc-3.4 has an option -Wno-invalid-offsetof to suppress
//       this warning.
#define offset_of(klass,field) (size_t)((intx)&(((klass*)16)->field) - 16)

Linux标准库其实也提供了一个获取成员变量偏移量的函数, offsetof ,代码在 stddef.h
#define offsetof(TYPE, MEMBER) ((size_t) &((TYPE *)0)->MEMBER)

其中0表示的是内存地址,C语言中允许这样的强制类型转换而不会报错。那么HotSpot所使用的offset_of就如注释所说,基址用16只是为了绕过gcc的warning。

readVMTypes方法也是类似的,根据HotSpot的VMTypeEntry创建对应的Type,这里就不赘述了。

SA从HotSpot中读取元信息的这种做法,有个很蛋疼的地方,

The names in vmStructs.cpp are used by the Java code in SA. Thus, if a field named in vmStructs.cpp is deleted or renamed, both vmStructs.cpp and the Java code that access that field have to be modified. If this isn’t done, then SA will fail when it tries to examine a process/core file.

lookupInProcess

readVMStructs方法中还有一个需要注意的地方,就是gHotSpotVMStructs变量地址的获取,这等于说是后续读取所有VMStructEntry的入口,

   // Fetch the address of the VMStructEntry*
    Address entryAddr = lookupInProcess("gHotSpotVMStructs");


  private Address lookupInProcess(String symbol) throws NoSuchSymbolException {
    // FIXME: abstract away the loadobject name
    for (int i = 0; i < jvmLibNames.length; i++) {
      
      // 委托给Debugger来查找
      
      Address addr = symbolLookup.lookup(jvmLibNames[i], symbol);
      if (addr != null) {
        return addr;
      }
    }
    String errStr = "(";
    for (int i = 0; i < jvmLibNames.length; i++) {
      errStr += jvmLibNames[i];
      if (i < jvmLibNames.length - 1) {
        errStr += ", ";
      }
    }
    errStr += ")";
    throw new NoSuchSymbolException(symbol,
                                    "Could not find symbol \"" + symbol +
                                    "\" in any of the known library names " +
                                    errStr);
  }

来看看 LinuxDebuggerLocal lookup方法
/** From the SymbolLookup interface via Debugger and JVMDebugger */
    public synchronized Address lookup(String objectName, String symbol) {
        requireAttach();
        if (!attached) {
            return null;
        }

        if (isCore) {
            long addr = lookupByName0(objectName, symbol);
            return (addr == 0)? null : new LinuxAddress(this, handleGCC32ABI(addr, symbol));
        } else {
            class LookupByNameTask implements WorkerThreadTask {
                String objectName, symbol;
                Address result;

                public void doit(LinuxDebuggerLocal debugger) {
                    long addr = debugger.lookupByName0(objectName, symbol);
                    result = (addr == 0 ? null : new LinuxAddress(debugger, handleGCC32ABI(addr, symbol)));
                }
            }

            LookupByNameTask task = new LookupByNameTask();
            task.objectName = objectName;
            task.symbol = symbol;
            workerThread.execute(task);
            return task.result;
        }
    }

看看本地方法 lookupByName0的实现
/*
 * Class:     sun_jvm_hotspot_debugger_linux_LinuxDebuggerLocal
 * Method:    lookupByName0
 * Signature: (Ljava/lang/String;Ljava/lang/String;)J
 */
JNIEXPORT jlong JNICALL Java_sun_jvm_hotspot_debugger_linux_LinuxDebuggerLocal_lookupByName0
  (JNIEnv *env, jobject this_obj, jstring objectName, jstring symbolName) {
  const char *objectName_cstr, *symbolName_cstr;
  jlong addr;
  jboolean isCopy;
  
  // 获取之前attach后保存下来的ps_prochandle
  
  struct ps_prochandle* ph = get_proc_handle(env, this_obj);

  objectName_cstr = NULL;
  if (objectName != NULL) {
    objectName_cstr = (*env)->GetStringUTFChars(env, objectName, &isCopy);
    CHECK_EXCEPTION_(0);
  }
  symbolName_cstr = (*env)->GetStringUTFChars(env, symbolName, &isCopy);
  CHECK_EXCEPTION_(0);

  
  // 调用lookup_symbol方法
  
  addr = (jlong) lookup_symbol(ph, objectName_cstr, symbolName_cstr);

  if (objectName_cstr != NULL) {
    (*env)->ReleaseStringUTFChars(env, objectName, objectName_cstr);
  }
  (*env)->ReleaseStringUTFChars(env, symbolName, symbolName_cstr);
  return addr;
}

lookup_symbol方法
// lookup for a specific symbol
uintptr_t lookup_symbol(struct ps_prochandle* ph,  const char* object_name,
                       const char* sym_name) {
   
   // 传进来的库文件名被忽略了
   
   // ignore object_name. search in all libraries
   // FIXME: what should we do with object_name?? The library names are obtained
   // by parsing /proc/<pid>/maps, which may not be the same as object_name.
   // What we need is a utility to map object_name to real file name, something
   // dlopen() does by looking at LD_LIBRARY_PATH and /etc/ld.so.cache. For
   // now, we just ignore object_name and do a global search for the symbol.

   
   // 在之前保存下来的符号表中查找
   
   lib_info* lib = ph->libs;
   while (lib) {
      if (lib->symtab) {
         uintptr_t res = search_symbol(lib->symtab, lib->base, sym_name, NULL);
         if (res) return res;
      }
      lib = lib->next;
   }

   print_debug("lookup failed for symbol '%s' in obj '%s'\n",
                          sym_name, object_name);
   return (uintptr_t) NULL;
}

alright,可以看到SA中有两种方式来获取HotSpotVM里面的变量地址,一种是通过符号表,另一种是通过VMStructEntry这种VM提供的元信息(也就是通过&运算获取的地址)。

现在地址是有了,那么它们的值是怎么被获取的呢?用ptrace搞定。

PTRACE_PEEKDATA

来看下VMStructEntryisStatic字段的读取,

        isStatic = !(entryAddr.getCIntegerAt(structEntryIsStaticOffset, C_INT32_SIZE, false) == 0);

Linux下的 Address 实现是 LinuxAddress ,看下它的 getCIntegerAt 方法,
 public long getCIntegerAt(long offset, long numBytes, boolean isUnsigned)
            throws UnalignedAddressException, UnmappedAddressException {
        return debugger.readCInteger(addr + offset, numBytes, isUnsigned);
    }

交给 LinuxDebuggerLocal 来处理了,
public long readCInteger(long address, long numBytes, boolean isUnsigned)
        throws UnmappedAddressException, UnalignedAddressException {
        // Only slightly relaxed semantics -- this is a hack, but is
        // necessary on x86 where it seems the compiler is
        // putting some global 64-bit data on 32-bit boundaries
        if (numBytes == 8) {
            utils.checkAlignment(address, 4);
        } else {
            utils.checkAlignment(address, numBytes);
        }
        byte[] data = readBytes(address, numBytes);
        return utils.dataToCInteger(data, isUnsigned);
    }

后面的调用链路是这样的, DebuggerBase#readBytes  ->  LinuxDebuggerLocal#readBytesFromProcess  -> LinuxDebuggerLocal#readBytesFromProcess0 ,很明显,又到了一个本地方法,
JNIEXPORT jbyteArray JNICALL Java_sun_jvm_hotspot_debugger_linux_LinuxDebuggerLocal_readBytesFromProcess0
  (JNIEnv *env, jobject this_obj, jlong addr, jlong numBytes) {

  jboolean isCopy;
  jbyteArray array;
  jbyte *bufPtr;
  ps_err_e err;

  array = (*env)->NewByteArray(env, numBytes);
  CHECK_EXCEPTION_(0);
  bufPtr = (*env)->GetByteArrayElements(env, array, &isCopy);
  CHECK_EXCEPTION_(0);

  err = ps_pdread(get_proc_handle(env, this_obj), (psaddr_t) (uintptr_t)addr, bufPtr, numBytes);
  (*env)->ReleaseByteArrayElements(env, array, bufPtr, 0);
  return (err == PS_OK)? array : 0;
}

ps_pdread
// read "size" bytes info "buf" from address "addr"
ps_err_e ps_pdread(struct ps_prochandle *ph, psaddr_t  addr,
                   void *buf, size_t size) {
  return ph->ops->p_pread(ph, (uintptr_t) addr, buf, size)? PS_OK: PS_ERR;
}

ph->ops 是一个 ps_prochandle_ops Linux下的赋值 如下,
static ps_prochandle_ops process_ops = {
  .release=  process_cleanup,
  .p_pread=  process_read_data,
  .p_pwrite= process_write_data,
  .get_lwp_regs= process_get_lwp_regs
};

所以最终是调用了 process_read_data方法
// read "size" bytes of data from "addr" within the target process.
// unlike the standard ptrace() function, process_read_data() can handle
// unaligned address - alignment check, if required, should be done
// before calling process_read_data.

static bool process_read_data(struct ps_prochandle* ph, uintptr_t addr, char *buf, size_t size) {
  long rslt;
  size_t i, words;
  uintptr_t end_addr = addr + size;
  uintptr_t aligned_addr = align(addr, sizeof(long));

  if (aligned_addr != addr) {
    char *ptr = (char *)&rslt;
    errno = 0;
    rslt = ptrace(PTRACE_PEEKDATA, ph->pid, aligned_addr, 0);
    if (errno) {
      print_debug("ptrace(PTRACE_PEEKDATA, ..) failed for %d bytes @ %lx\n", size, addr);
      return false;
    }
    for (; aligned_addr != addr; aligned_addr++, ptr++);
    for (; ((intptr_t)aligned_addr % sizeof(long)) && aligned_addr < end_addr;
        aligned_addr++)
       *(buf++) = *(ptr++);
  }

  words = (end_addr - aligned_addr) / sizeof(long);

  // assert((intptr_t)aligned_addr % sizeof(long) == 0);
  for (i = 0; i < words; i++) {
    errno = 0;
    rslt = ptrace(PTRACE_PEEKDATA, ph->pid, aligned_addr, 0);
    if (errno) {
      print_debug("ptrace(PTRACE_PEEKDATA, ..) failed for %d bytes @ %lx\n", size, addr);
      return false;
    }
    *(long *)buf = rslt;
    buf += sizeof(long);
    aligned_addr += sizeof(long);
  }

  if (aligned_addr != end_addr) {
    char *ptr = (char *)&rslt;
    errno = 0;
    rslt = ptrace(PTRACE_PEEKDATA, ph->pid, aligned_addr, 0);
    if (errno) {
      print_debug("ptrace(PTRACE_PEEKDATA, ..) failed for %d bytes @ %lx\n", size, addr);
      return false;
    }
    for (; aligned_addr != end_addr; aligned_addr++)
       *(buf++) = *(ptr++);
  }
  return true;
}

可以看到最后也是通过ptrace来读取目标VM中的数据。

总结一下,

  1. 通过/proc/[pid]/maps读取ELF文件,保存符号表;
  2. 通过符号表读取HotSpotVM中localHotSpotVMStructslocalHotSpotVMTypes变量的地址
  3. 使用ptrace读取上述变量的值
  4. 这两个变量值包含了SA需要用到的HotSpotVM中的数据的元信息(类型信息,字段offset,地址等);
  5. 有了这些元信息就可以使用ptrace读取目标VM上这些数据的值;

    参考资料

  6. http://openjdk.java.net/groups/hotspot/docs/Serviceability.html#bsa
  7. https://www.usenix.org/legacy/events/jvm01/full_papers/russell/russell_html/index.html
  8. http://stackoverflow.com/questions/8238896/gcc-struct-defining-members-in-specific-offsets

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值