OpenJDK源码下载
Java中String类的intern方法是一个native方法。
/**
* Returns a canonical representation for the string object.
* <p>
* A pool of strings, initially empty, is maintained privately by the
* class {@code String}.
* <p>
* When the intern method is invoked, if the pool already contains a
* string equal to this {@code String} object as determined by
* the {@link #equals(Object)} method, then the string from the pool is
* returned. Otherwise, this {@code String} object is added to the
* pool and a reference to this {@code String} object is returned.
* <p>
* It follows that for any two strings {@code s} and {@code t},
* {@code s.intern() == t.intern()} is {@code true}
* if and only if {@code s.equals(t)} is {@code true}.
* <p>
* All literal strings and string-valued constant expressions are
* interned. String literals are defined in section 3.10.5 of the
* <cite>The Java™ Language Specification</cite>.
*
* @return a string that has the same contents as this string, but is
* guaranteed to be from a pool of unique strings.
*/
public native String intern();
需要在OpenJDK8u源码中查看方法的实现,为了方便查看文件,已经将源码下载到本地。目录结构如下:
native关键字
Java中如果一个方法用native关键字修饰,代表该方法的实现是用其他语言(C或C++)实现的。Java语言虽然不能对操作系统底层进行访问和操作,但是可以可以通过JNI(Java Native Interface)调用其他语言来实现对底层的访问。也就是说JDK中的代码库不仅由Java语言编写还有C/C++编写。
intern方法实现
intern方法实现代码如下:
openjdk\jdk\src\share\native\java\lang\String.c
#include "jvm.h"
#include "java_lang_String.h"
JNIEXPORT jobject JNICALL
Java_java_lang_String_intern(JNIEnv *env, jobject this)
{
return JVM_InternString(env, this);
}
openjdk\hotspot\src\share\vm\prims\jvm.h
JNIEXPORT jstring JNICALL
JVM_InternString(JNIEnv *env, jstring str);
openjdk\hotspot\src\share\vm\prims\jvm.cpp
JVM_ENTRY(jstring, JVM_InternString(JNIEnv *env, jstring str))
JVMWrapper("JVM_InternString");
JvmtiVMObjectAllocEventCollector oam;
if (str == NULL) return NULL;
oop string = JNIHandles::resolve_non_null(str); // 1 保证结果不为空
oop result = StringTable::intern(string, CHECK_NULL); // 2
return (jstring) JNIHandles::make_local(env, result);
JVM_END
oop-klass模型
hotspot内部基于oop-klass模型描述一个java类,oop(ordinary object pointer)用来表示一个对象的实例信息,klass包含元数据和方法信息,用来描述java类或jvm内部c++类型的信息,java类的继承信息,成员变量、静态变量、构造函数等信息都在klass中保存。另外还有封装了oop的handle类,在访问java类时是通过handle内部指针得到oop实例的。
Handle定义:
class Handle VALUE_OBJ_CLASS_SPEC {
private:
oop* _handle;
\\ ..
};
三者关系可以简单描述为下图:参考 1
openjdk\hotspot\src\share\vm\runtime\jniHandles.hpp
inline oop JNIHandles::resolve_non_null(jobject handle) {
assert(handle != NULL, "JNI handle should not be null");
oop result = *(oop*)handle;
assert(result != NULL, "Invalid value read from jni handle");
assert(result != badJNIHandle, "Pointing to zapped jni handle area");
// Don't let that private _deleted_handle object escape into the wild.
assert(result != deleted_handle(), "Used a deleted global handle.");
return result;
};
resolve_non_null方法返回oop,oop本身是一个指针类型(oopDesc*类型),可以通过指针访问对应的oopDesc类型(对象实例信息)。调用该方法的str对象一定是不为空的,也就是oop一定不为空。
StringTable
StringTable::intern是最关键一步,先来看一下StringTable。
openjdk\hotspot\src\share\vm\classfile\symbolTable.hpp
class StringTable : public Hashtable<oop, mtSymbol> {
friend class VMStructs;
private:
// The string table
static StringTable* _the_table;
// Set if one bucket is out of balance due to hash algorithm deficiency
static bool _needs_rehashing;
// Claimed high water mark for parallel chunked scanning
static volatile int _parallel_claimed_idx;
static oop intern(Handle string_or_null, jchar* chars, int length, TRAPS);
oop basic_add(int index, Handle string_or_null, jchar* name, int len,
unsigned int hashValue, TRAPS);
oop lookup(int index, jchar* chars, int length, unsigned int hashValue);
// Apply the give oop closure to the entries to the buckets
// in the range [start_idx, end_idx).
static void buckets_do(OopClosure* f, int start_idx, int end_idx);
StringTable() : Hashtable<oop, mtSymbol>((int)StringTableSize,
sizeof (HashtableEntry<oop, mtSymbol>)) {}
StringTable(HashtableBucket<mtSymbol>* t, int number_of_entries)
: Hashtable<oop, mtSymbol>((int)StringTableSize, sizeof (HashtableEntry<oop, mtSymbol>), t,
number_of_entries) {}
public:
// The string table
static StringTable* the_table() { return _the_table; }
// Size of one bucket in the string table. Used when checking for rollover.
static uint bucket_size() { return sizeof(HashtableBucket<mtSymbol>); }
static void create_table() {
assert(_the_table == NULL, "One string table allowed.");
_the_table = new StringTable();
}
StringTable继承Hashtable<oop, mtSymbol>,Hashtable继承BasicHashtable<F>
openjdk\hotspot\src\share\vm\utilities\hashtable.hpp
template <class T, MEMFLAGS F> class Hashtable : public BasicHashtable<F>
template <MEMFLAGS F> class BasicHashtable : public CHeapObj<F> {
// ...
private:
// Instance variables
int _table_size;
HashtableBucket<F>* _buckets;
BasicHashtableEntry<F>* _free_list;
char* _first_free_entry;
char* _end_block;
int _entry_size;
int _number_of_entries;
// ...
}
template <class T, MEMFLAGS F> class HashtableEntry : public BasicHashtableEntry<F> {
friend class VMStructs;
private:
T _literal; // ref to item in table.
// ...
};
template <MEMFLAGS F> class BasicHashtableEntry : public CHeapObj<F> {
friend class VMStructs;
private:
unsigned int _hash; // 32-bit hash for item
BasicHashtableEntry<F>* _next;
// ...
};
StringTable实际是一个类似java中HashTable的结构,其中entry结构如下:
StringTable::intern
openjdk\hotspot\src\share\vm\classfile\symbolTable.cpp
oop StringTable::intern(oop string, TRAPS)
{
if (string == NULL) return NULL;
ResourceMark rm(THREAD);
int length;
Handle h_string (THREAD, string);
jchar* chars = java_lang_String::as_unicode_string(string, length, CHECK_NULL);
oop result = intern(h_string, chars, length, CHECK_NULL);
return result;
}
java_lang_String::as_unicode_string将字符串oop中char[]数组取出。
oop StringTable::intern(Handle string_or_null, jchar* name,
int len, TRAPS) {
unsigned int hashValue = hash_string(name, len);
int index = the_table()->hash_to_index(hashValue);
oop found_string = the_table()->lookup(index, name, len, hashValue);
// Found
if (found_string != NULL) return found_string;
debug_only(StableMemoryChecker smc(name, len * sizeof(name[0])));
assert(!Universe::heap()->is_in_reserved(name),
"proposed name of symbol must be stable");
Handle string;
// try to reuse the string if possible
if (!string_or_null.is_null()) {
string = string_or_null;
} else {
string = java_lang_String::create_from_unicode(name, len, CHECK_NULL);
}
// Grab the StringTable_lock before getting the_table() because it could
// change at safepoint.
MutexLocker ml(StringTable_lock, THREAD);
// Otherwise, add to symbol to table
return the_table()->basic_add(index, string, name, len,
hashValue, CHECK_NULL);
}
可以看到先计算hash值和在hashtable中下标的位置,查看该下标中entry是有与name字符数组相同的entry,有的化将oop返回,没有找到则将oop添加到hashtable中,并将oop返回。 下面是一些实现细节:
openjdk\hotspot\src\share\vm\classfile\javaClasses.app
jchar* java_lang_String::as_unicode_string(oop java_string, int& length, TRAPS) {
typeArrayOop value = java_lang_String::value(java_string);
int offset = java_lang_String::offset(java_string);
length = java_lang_String::length(java_string);
jchar* result = NEW_RESOURCE_ARRAY_RETURN_NULL(jchar, length);
if (result != NULL) {
for (int index = 0; index < length; index++) {
result[index] = value->char_at(index + offset);
}
} else {
THROW_MSG_0(vmSymbols::java_lang_OutOfMemoryError(), "could not allocate Unicode string");
}
return result;
}
string本质上是由char[]数组组成的,通过该方法取出,接下来用于计算hash值和与hashtableEntry中oop的比较。依次比较char[]中每个字符字符相等,如下:
bool java_lang_String::equals(oop java_string, jchar* chars, int len) {
assert(java_string->klass() == SystemDictionary::String_klass(),
"must be java_string");
typeArrayOop value = java_lang_String::value(java_string);
int offset = java_lang_String::offset(java_string);
int length = java_lang_String::length(java_string);
if (length != len) {
return false;
}
for (int i = 0; i < len; i++) {
if (value->char_at(i + offset) != chars[i]) {
return false;
}
}
return true;
}
lookup和basic_add的具体实现:
openjdk\hotspot\src\share\vm\classfile\symbolTable.cpp
oop StringTable::lookup(int index, jchar* name,
int len, unsigned int hash) {
int count = 0;
for (HashtableEntry<oop, mtSymbol>* l = bucket(index); l != NULL; l = l->next()) {
count++;
if (l->hash() == hash) {
if (java_lang_String::equals(l->literal(), name, len)) {
return l->literal();
}
}
}
// If the bucket size is too deep check if this hash code is insufficient.
if (count >= BasicHashtable<mtSymbol>::rehash_count && !needs_rehashing()) {
_needs_rehashing = check_rehash_table(count);
}
return NULL;
}
oop StringTable::basic_add(int index_arg, Handle string, jchar* name,
int len, unsigned int hashValue_arg, TRAPS) {
assert(java_lang_String::equals(string(), name, len),
"string must be properly initialized");
// Cannot hit a safepoint in this function because the "this" pointer can move.
No_Safepoint_Verifier nsv;
// Check if the symbol table has been rehashed, if so, need to recalculate
// the hash value and index before second lookup.
unsigned int hashValue;
int index;
if (use_alternate_hashcode()) {
hashValue = hash_string(name, len);
index = hash_to_index(hashValue);
} else {
hashValue = hashValue_arg;
index = index_arg;
}
// Since look-up was done lock-free, we need to check if another
// thread beat us in the race to insert the symbol.
oop test = lookup(index, name, len, hashValue); // calls lookup(u1*, int)
if (test != NULL) {
// Entry already added
return test;
}
HashtableEntry<oop, mtSymbol>* entry = new_entry(hashValue, string());
add_entry(index, entry);
return string();
}
basic_add方法大致逻辑就是先计算出哈希值和hashtable的下标值,然后new一个HashtableEntry,其中_literal存放的是指向当前字符串实例的指针oop,然后将该entry加入hashtable。
Handle 类型定义:
openjdk\hotspot\src\share\vm\runtime\handles.hpp
class Handle VALUE_OBJ_CLASS_SPEC {
private:
oop* _handle;
protected:
oop obj() const { return _handle == NULL ? (oop)NULL : *_handle; }
oop non_null_obj() const { assert(_handle != NULL, "resolving NULL handle"); return *_handle; }
public:
// Constructors
Handle() { _handle = NULL; }
Handle(oop obj);
Handle(Thread* thread, oop obj);
// General access
oop operator () () const { return obj(); }
oop operator -> () const { return non_null_obj(); }
bool operator == (oop o) const { return obj() == o; }
bool operator == (const Handle& h) const { return obj() == h.obj(); }
// ...
};
总结
所以在OpenJDK8u中,所谓的常量池是内存中维护了一个StringTable(hashtable),存放若干个指向String对象的指针。
参考:揭秘Java虚拟机:JVM设计原理与实现(封亚飞著) ↩︎