我建立了一个JVMTest工程,用于测试。在这个工程中编写和测试一些utilities。每个功能可以单独测试。
关于常量池的解析已初步完成,代码如下。
JVMTest.cpp
#include <stdio.h>
#include <io.h>
#include <fcntl.h>
#include "ClassFileParser.h"
#ifdef WIN32
#include <windows.h>
#else //unix,linux
#include <sys/types.h>
#include <unistd.h>
#define _open open
#define _read read
#define _write write
#define _lseek lseek //linux下暂时用lseek(lseek64似乎不都支持)
#define _close close
#endif
//将一个class文件读入缓存
//返回:缓存指针,调用者释放
char * loadClassToBuffer(const char *file, int *bufferLength)
{
int fd;
fd = _open(file, O_RDONLY | O_BINARY);
if (fd == -1) {
printf("can not open file");
return NULL;
}
long length = _lseek(fd, 0, SEEK_END);
if (length == -1) {
_close(fd);
printf("can not seek file");
return NULL;
}
char *buffer = new char [length];
if (buffer == NULL) {
_close(fd);
printf("out of memory");
return NULL;
}
_lseek(fd, 0, SEEK_SET);
int bytesRead = _read(fd, buffer, length);
if (bytesRead != length) {
_close(fd);
delete [] buffer;
printf("read failed");
return NULL;
}
*bufferLength = length;
return buffer;
}
int main(int argc, char *argv[])
{
printf("JVM Test, JVM Utilities test...\n");
if (argc < 2) {
printf("Usage: JVMTest classfile\n");
return 0;
}
const char *classFileName = argv[1];
int bufferLength;
char *classBuffer = loadClassToBuffer(classFileName, &bufferLength);
if (classBuffer == NULL) {
printf("\nload class file failed");
return -1;
}
ClassFileParser parser((const u1 *)classBuffer, bufferLength);
try {
parser.parseVersion();
parser.parseConstantPool();
parser.printSummary();
} catch (Exception e) {
printf("parse class file failed: %s", e.getMessage());
}
//test
//Exception e("abcdef");
return 0;
}
ClassFileParser.h
#pragma once
#include "util.h"
typedef unsigned int u4;
typedef unsigned short u2;
typedef unsigned char u1;
struct cp_info;
struct field_info;
struct method_info;
struct attribute_info;
#pragma pack(1)
struct ClassFileFormat
{
u4 magic;
u2 minor_verison;
u2 major_version;
u2 constant_pool_count; //按照JVM规范,此值等于cp_info的记录数+1
cp_info *constant_pool; //[constant_pool_count-1]
u2 access_flags;
u2 this_class;
u2 super_class;
u2 interfaces_count;
u2 *interfaces; //[interfaces_count]
u2 fields_count;
field_info *fields; //[fields_count]
u2 methods_count;
method_info *methods; //[methods_count];
u2 attributes_count;
attribute_info *attributes; //[attributes_count];
};
struct cp_info
{
u1 tag;
u1 *info;
};
enum
{
CONSTANT_Utf8 = 1,
CONSTANT_Unicode,
CONSTANT_Integer,
CONSTANT_Float,
CONSTANT_Long,
CONSTANT_Double,
CONSTANT_Class,
CONSTANT_String,
CONSTANT_Fieldref,
CONSTANT_Methodref,
CONSTANT_InterfaceMethodref,
CONSTANT_NameAndType,
};
struct CONSTANT_Class_info
{
u1 tag;
u2 name_index; //常量池索引,该索引处的常量项必须是一个CONSTANT_Utf8_info
};
struct CONSTANT_Fieldref_info
{
u1 tag;
u2 class_index; //常量池索引,必须指向一个CONSTANT_Class_info
u2 name_and_type_index; //常量池索引,必须指向一个CONSTANT_NameAndType_info
};
struct CONSTANT_Methodref_info
{
u1 tag;
u2 class_index; //常量池索引,必须指向一个CONSTANT_Class_info
u2 name_and_type_index; //常量池索引,必须指向一个CONSTANT_NameAndType_info
};
struct CONSTANT_InterfaceMethodref_info
{
u1 tag;
u2 class_index; //常量池索引,必须指向一个CONSTANT_Class_info
u2 name_and_type_index; //常量池索引,必须指向一个CONSTANT_NameAndType_info
};
struct CONSTANT_String_info
{
u1 tag;
u2 string_index; //常量池索引,必须指向一个CONSTANT_Utf8_info
};
struct CONSTANT_Integer_info
{
u1 tag;
u4 bytes; //4字节整数,高字节在前
};
struct CONSTANT_Float_info
{
u1 tag;
u4 bytes; //4字节浮点数,IEEE 754格式,高字节在前
};
struct CONSTANT_Long_info
{
u1 tag;
u4 high_bytes; //8字节整数,高字节在前
u4 low_bytes;
};
struct CONSTANT_Double_info
{
u1 tag;
u4 high_bytes; //双精度浮点数,IEEE 754格式,高字节在前
u4 low_bytes;
};
struct CONSTANT_NameAndType_info
{
u1 tag;
u2 name_index; //field或method的简单名字。该索引必须指向一个CONSTANT_Utf8_info
u2 descriptor_index; //field或method的描述符。该索引必须指向一个CONSTANT_Utf8_info
};
struct CONSTANT_Utf8_info
{
u1 tag;
u2 length;
u1 *bytes; //bytes的长度为length
};
#pragma pack()
class ClassFileParser
{
public:
ClassFileParser(const u1 *classInBuffer, int length);
virtual ~ClassFileParser(void);
void parseVersion() throw (Exception);
void parseConstantPool() throw (Exception);
void printSummary();
private:
const u1 * _class_buffer;
int _buffer_length;
bool valid_cp_index(int index, int cp_count) {
return (index >= 1 && index < cp_count);
}
bool is_supported_version(u2 majorVersion, u2 minorVersion);
bool check_utf8_string(const u1 *bytes, int length);
void saveConstantPool() throw (Exception);
private:
u2 _major_version;
u2 _minor_version;
int _cp_count;
int _cp_length;
u1 ** _cp_index;
u1 * _cp_data;
u1 _tag_0;
};
class ClassBufferInput
{
public:
ClassBufferInput(const u1* buffer, int length);
const u1* buffer() const { return _buffer_start; }
int length() const { return _buffer_end - _buffer_start; }
const u1* current() const { return _current; }
u1 read_u1() throw (Exception);
u2 read_u2() throw (Exception);
u4 read_u4() throw (Exception);
void skip_u1() throw (Exception);
void skip_u2() throw (Exception);
void skip_u4() throw (Exception);
void skip_n(int n) throw (Exception);
//java class文件中u2,u4的存储都是高字节在前,低字节在后
static u2 read_java_u2(const u1 *buffer);
static u4 read_java_u4(const u1 *buffer);
private:
const u1* _buffer_start;
const u1* _buffer_end;
const u1* _current;
void guarantee_size(int size) throw (Exception);
};
class ClassBufferOutput
{
public:
ClassBufferOutput(u1* buffer, int length);
u1* buffer() const { return _buffer_start; }
int length() const { return _buffer_end - _buffer_start; }
u1* current() const { return _current; }
void write_u1(u1 c) throw (Exception);
void write_u2(u2 x) throw (Exception);
void write_u4(u4 x) throw (Exception);
void write_bytes(const u1 *bytes, int n) throw (Exception);
private:
u1* _buffer_start;
u1* _buffer_end;
u1* _current;
void guarantee_size(int size) throw (Exception);
};
ClassFileParser.cpp
#include "ClassFileParser.h"
#include <stdio.h>
#include <memory.h>
#define CLASS_FILE_MAGIC_U4 0xCAFEBABE
ClassFileParser::ClassFileParser(const u1 *classInBuffer, int length)
{
_class_buffer = classInBuffer;
_buffer_length = length;
_major_version = 0;
_minor_version = 0;
_cp_count = 0;
_cp_length = -1;
_cp_data = NULL;
_cp_index = NULL;
_tag_0 = 0;
}
ClassFileParser::~ClassFileParser(void)
{
}
void ClassFileParser::parseVersion() throw (Exception)
{
ClassBufferInput in(_class_buffer, _buffer_length);
u4 magic = in.read_u4();
assert_exception(magic == CLASS_FILE_MAGIC_U4, "bad magic value");
u2 minorVersion = in.read_u2();
u2 majorVersion = in.read_u2();
assert_exception(is_supported_version(majorVersion, minorVersion),
"unsupported class version");
_major_version = majorVersion;
_minor_version = minorVersion;
}
bool ClassFileParser::is_supported_version(u2 major, u2 minor)
{
//实际的Java虚拟机的版本,如SUN的Hotspot,令人费解
//比如1.5版本,推测major=1,minor=5,可是SUN的Hotspot虚拟机却不认
//查看Hotspot源代码,最小版本竟从45开始
if (major > 45 && major <= 51)
return true;
return false;
}
void ClassFileParser::parseConstantPool() throw (Exception)
{
int magic_version_length = sizeof(u4) + sizeof(u2) + sizeof(u2);
ClassBufferInput in(_class_buffer + magic_version_length,
_buffer_length - magic_version_length);
u2 cp_count = in.read_u2(); //常量池项目数 + 1
assert_exception(cp_count >= 1, "bad constant pool size");
//下面遍历一遍常量池,为了统计常量池的长度(字节数),顺便执行一些检查
int cp_length = 0;
int cp_info_length;
for (int index = 1; index < cp_count; index++)
{
cp_info_length = -1;
u1 tag = in.read_u1();
printf("index:%d, tag: %d, ", index, tag);
switch (tag)
{
case CONSTANT_Utf8:
{
u2 length = in.read_u2();
//检查utf8字符串
bool isUtf8 = check_utf8_string(in.current(), length);
assert_exception(isUtf8, "bad utf8 string");
in.skip_n(length);
cp_info_length = 2 + length;
}
break;
case CONSTANT_Integer:
{
in.skip_u4();
cp_info_length = 4;
}
break;
case CONSTANT_Float:
{
in.skip_u4();
cp_info_length = 4;
}
break;
case CONSTANT_Long:
{
in.skip_n(8);
cp_info_length = 8;
index++; //JVM规范:8字节的常量池项在计数上占两个
}
break;
case CONSTANT_Double:
{
in.skip_n(8);
cp_info_length = 8;
index++; //JVM规范:8字节的常量池项在计数上占两个
}
break;
case CONSTANT_Class:
{
u2 name_and_index = in.read_u2();
assert_exception(valid_cp_index(name_and_index, cp_count),
"bad constant pool index");
cp_info_length = 2;
}
break;
case CONSTANT_String:
{
u2 string_index = in.read_u2();
assert_exception(valid_cp_index(string_index, cp_count),
"bad constant pool index");
cp_info_length = 2;
}
break;
case CONSTANT_Fieldref:
case CONSTANT_Methodref:
case CONSTANT_InterfaceMethodref:
{
u2 class_index = in.read_u2();
u2 name_and_type_index = in.read_u2();
assert_exception(valid_cp_index(class_index, cp_count),
"bad constant pool index");
assert_exception(valid_cp_index(name_and_type_index, cp_count),
"bad constant pool index");
cp_info_length = 4;
}
break;
case CONSTANT_NameAndType:
{
u2 name_index = in.read_u2();
u2 descriptor_index = in.read_u2();
assert_exception(valid_cp_index(name_index, cp_count),
"bad constant pool index");
assert_exception(valid_cp_index(descriptor_index, cp_count),
"bad constant pool index");
cp_info_length = 4;
}
break;
default:
{
char msg[30];
sprintf(msg, "unknown tag: %d", tag);
assert_exception(false, msg);
}
break;
}
assert_exception(cp_info_length != -1, "internal error");
printf("length: %d\n", cp_info_length);
cp_length += (cp_info_length + 1);
}
_cp_count = cp_count;
_cp_length = cp_length;
saveConstantPool();
}
bool ClassFileParser::check_utf8_string(const u1 *bytes, int length)
{
//Todo: check utf8 string
return true;
}
void ClassFileParser::saveConstantPool() throw (Exception)
{
int magic_version_length = sizeof(u4) + sizeof(u2) + sizeof(u2);
ClassBufferInput in(_class_buffer + magic_version_length,
_buffer_length - magic_version_length);
u2 cp_count = in.read_u2(); //常量池项目数 + 1
//现在已知常量池的长度,保存到一个数组中(目前运行时常量池与原始常量池完全相同!)
//另用一个数组保存各个常量项的索引
_cp_data = new u1 [_cp_length];
assert_exception(_cp_data != NULL, "out of memory");
_cp_index = new u1 * [cp_count - 1];
assert_exception(_cp_index != NULL, "out of memory");
ClassBufferOutput out(_cp_data, _cp_length);
for (int index = 0; index < cp_count - 1; index++)
{
u1 tag = in.read_u1();
switch (tag)
{
case CONSTANT_Utf8:
{
u2 length = in.read_u2();
_cp_index[index] = out.current();
out.write_u1(tag);
out.write_u2(length);
out.write_bytes(in.current(), length);
in.skip_n(length);
}
break;
case CONSTANT_Integer:
{
u4 bytes = in.read_u4();
_cp_index[index] = out.current();
out.write_u1(tag);
out.write_u4(bytes);
}
break;
case CONSTANT_Float:
{
u4 bytes = in.read_u4();
_cp_index[index] = out.current();
out.write_u1(tag);
out.write_u4(bytes);
}
break;
case CONSTANT_Long:
{
u4 high_bytes = in.read_u4();
u4 low_bytes = in.read_u4();
_cp_index[index] = out.current();
out.write_u1(tag);
out.write_u4(high_bytes);
out.write_u4(low_bytes);
index++; //JVM规范:8字节的常量池项在计数上占两个
_cp_index[index] = &_tag_0;
}
break;
case CONSTANT_Double:
{
u4 high_bytes = in.read_u4();
u4 low_bytes = in.read_u4();
_cp_index[index] = out.current();
out.write_u1(tag);
out.write_u4(high_bytes);
out.write_u4(low_bytes);
index++; //JVM规范:8字节的常量池项在计数上占两个
_cp_index[index] = &_tag_0;
}
break;
case CONSTANT_Class:
{
u2 name_and_index = in.read_u2();
_cp_index[index] = out.current();
out.write_u1(tag);
out.write_u2(name_and_index);
}
break;
case CONSTANT_String:
{
u2 string_index = in.read_u2();
_cp_index[index] = out.current();
out.write_u1(tag);
out.write_u2(string_index);
}
break;
case CONSTANT_Fieldref:
case CONSTANT_Methodref:
case CONSTANT_InterfaceMethodref:
{
u2 class_index = in.read_u2();
u2 name_and_type_index = in.read_u2();
_cp_index[index] = out.current();
out.write_u1(tag);
out.write_u2(class_index);
out.write_u2(name_and_type_index);
}
break;
case CONSTANT_NameAndType:
{
u2 name_index = in.read_u2();
u2 descriptor_index = in.read_u2();
_cp_index[index] = out.current();
out.write_u1(tag);
out.write_u2(name_index);
out.write_u2(descriptor_index);
}
break;
default:
{
char msg[30];
sprintf(msg, "unknown tag: %d", tag);
assert_exception(false, msg);
}
break;
}
}
//验证相等
//const u1 * p1 = _class_buffer + magic_version_length + 2;
//int cmp = memcmp(p1, _cp_data, _cp_length);
//检查常量项相互之间的引用是否正确
for (int index = 1; index < cp_count; index++)
{
u1 * pCurrent = _cp_index[index - 1];
u1 tag = *pCurrent++;
switch (tag)
{
case CONSTANT_Utf8:
break;
case CONSTANT_Integer:
break;
case CONSTANT_Float:
break;
case CONSTANT_Long:
{
index++;
}
break;
case CONSTANT_Double:
{
index++;
}
break;
case CONSTANT_Class:
{
u2 name_index = ClassBufferInput::read_java_u2(pCurrent);
u1 ref_tag = *(_cp_index[name_index - 1]);
assert_exception(ref_tag == CONSTANT_Utf8,
"name_index refered is not CONSTANT_Utf8");
}
break;
case CONSTANT_String:
{
u2 string_index = ClassBufferInput::read_java_u2(pCurrent);
u1 ref_tag = *(_cp_index[string_index - 1]);
assert_exception(ref_tag == CONSTANT_Utf8,
"string_index refered is not CONSTANT_Utf8");
}
break;
case CONSTANT_Fieldref:
case CONSTANT_Methodref:
case CONSTANT_InterfaceMethodref:
{
u2 class_index = ClassBufferInput::read_java_u2(pCurrent);
pCurrent += 2;
u2 name_and_type_index = ClassBufferInput::read_java_u2(pCurrent);
u1 ref_tag1 = *(_cp_index[class_index - 1]);
assert_exception(ref_tag1 == CONSTANT_Class,
"class_index refered is not CONSTANT_Class");
u1 ref_tag2 = *(_cp_index[name_and_type_index - 1]);
assert_exception(ref_tag2 == CONSTANT_NameAndType,
"name_and_type_index refered is not CONSTANT_NameAndType");
}
break;
case CONSTANT_NameAndType:
{
u2 name_index = ClassBufferInput::read_java_u2(pCurrent);
pCurrent += 2;
u2 descriptor_index = ClassBufferInput::read_java_u2(pCurrent);
u1 ref_tag1 = *(_cp_index[name_index - 1]);
assert_exception(ref_tag1 == CONSTANT_Utf8,
"name_index refered is not CONSTANT_Utf8");
u1 ref_tag2 = *(_cp_index[descriptor_index - 1]);
assert_exception(ref_tag2 == CONSTANT_Utf8,
"descriptor_index refered is not CONSTANT_Utf8");
}
break;
}
}
}
void ClassFileParser::printSummary()
{
printf("class version: %d.%d, cp count: %d, cp length: %d\n",
_major_version, _minor_version, _cp_count, _cp_length);
}
//ClassBufferInput
ClassBufferInput::ClassBufferInput(const u1* buffer, int length)
{
_buffer_start = buffer;
_buffer_end = buffer + length;
_current = buffer;
}
void ClassBufferInput::guarantee_size(int size) throw (Exception)
{
if (size > (_buffer_end - _buffer_start)) {
throw Exception("unexpected end of file");
}
}
u1 ClassBufferInput::read_u1() throw (Exception)
{
guarantee_size(1);
u1 c = *_current++;
return c;
}
u2 ClassBufferInput::read_u2() throw (Exception)
{
guarantee_size(2);
u2 result = read_java_u2(_current);
_current += 2;
return result;
}
u4 ClassBufferInput::read_u4() throw (Exception)
{
guarantee_size(4);
u4 result = read_java_u4(_current);
_current += 4;
return result;
}
u2 ClassBufferInput::read_java_u2(const u1 *buffer)
{
u1 c1 = buffer[0];
u1 c2 = buffer[1];
u2 result = (u2)c1 << 8 | (u2)c2;
return result;
}
u4 ClassBufferInput::read_java_u4(const u1 *buffer)
{
u1 c1 = buffer[0];
u1 c2 = buffer[1];
u1 c3 = buffer[2];
u1 c4 = buffer[3];
u4 result = (u4)c1 << 24 | (u4)c2 << 16 | (u4)c3 << 8 | (u4)c4;
return result;
}
void ClassBufferInput::skip_u1() throw (Exception)
{
guarantee_size(1);
_current++;
}
void ClassBufferInput::skip_u2() throw (Exception)
{
guarantee_size(2);
_current += 2;
}
void ClassBufferInput::skip_u4() throw (Exception)
{
guarantee_size(4);
_current += 4;
}
void ClassBufferInput::skip_n(int n) throw (Exception)
{
guarantee_size(n);
_current += n;
}
//ClassBufferOutput
ClassBufferOutput::ClassBufferOutput(u1* buffer, int length)
{
_buffer_start = buffer;
_buffer_end = buffer + length;
_current = buffer;
}
void ClassBufferOutput::guarantee_size(int size) throw (Exception)
{
if (size > (_buffer_end - _buffer_start)) {
throw Exception("insufficient buffer");
}
}
void ClassBufferOutput::write_u1(u1 c) throw (Exception)
{
guarantee_size(1);
*_current++ = c;
}
void ClassBufferOutput::write_u2(u2 x) throw (Exception)
{
guarantee_size(2);
u1 c1 = x >> 8;
u1 c2 = x;
*_current++ = c1;
*_current++ = c2;
}
void ClassBufferOutput::write_u4(u4 x) throw (Exception)
{
guarantee_size(4);
u1 c1 = x >> 24;
u1 c2 = x >> 16;
u1 c3 = x >> 8;
u1 c4 = x;
*_current++ = c1;
*_current++ = c2;
*_current++ = c3;
*_current++ = c4;
}
void ClassBufferOutput::write_bytes(const u1 *bytes, int n) throw (Exception)
{
guarantee_size(n);
for (int i = 0; i < n; i++) {
*_current++ = *bytes++;
}
}
util.h
#ifndef _util_h
#define _util_h
class Exception
{
public:
Exception();
Exception(const char *msg);
const char * getMessage() const { return _msg; }
private:
static const int MAX_MSG_SIZE = 256;
char _msg[MAX_MSG_SIZE+1];
};
void assert_exception(bool b) throw (Exception);
void assert_exception(bool b, const char *msg) throw (Exception);
#endif
util.cpp
#include "util.h"
#include <string.h>
Exception::Exception()
{
memset(_msg, 0, MAX_MSG_SIZE);
}
Exception::Exception(const char *msg)
{
strncpy(_msg, msg, MAX_MSG_SIZE-1);
_msg[MAX_MSG_SIZE-1] = '\0';
}
void assert_exception(bool b) throw (Exception)
{
if (!b) {
throw Exception();
}
}
void assert_exception(bool b, const char *msg) throw (Exception)
{
if (!b) {
throw Exception(msg);
}
}
这个JVMTest工程可以用Visual Studio编译,或者在Linux上编译。我用的是cygwin,编译命令:g++ -g ClassFileParser.cpp JVMTest.cpp util.cpp