前言:日常的问题记录和思考。
java 数组是否是顺序存储的? hashCode和 identityHashCode 的区别?
问题来源:算法第四版,295页,书上说默认散列函数会返回对象的内存地址,这句话我有所怀疑。
Memory Address of Objects in Java
首先我们定义一个数组,然后打印一下 hashCode 和 identityHashCode
Arrays arrays1 = new Arrays();
Arrays arrays2 = new Arrays();
Arrays[] arrays = new Arrays[]{arrays1, arrays2};
for (Arrays array : arrays) {
System.out.println(array);
System.out.println(array.hashCode());
System.out.println(System.identityHashCode(array));
System.out.println(VM.current().addressOf(array));
}
--------
com.example.common.data_structure.Arrays@16b98e56
381259350
381259350
32064335112
com.example.common.data_structure.Arrays@1e81f4dc
511833308
511833308
32064335128
VM是 jol 的方法
<!-- https://mvnrepository.com/artifact/org.openjdk.jol/jol-core -->
<dependency>
<groupId>org.openjdk.jol</groupId>
<artifactId>jol-core</artifactId>
<version>0.16</version>
<scope>provided</scope>
</dependency>
addressOf 方法的注释
toString 方法不用多说,基于hashCode进行运算肯定不是地址。
hashCode 是一个本地方法需要查阅源码。
看注释的第一句话,为给定对象返回与默认方法 hashCode() 返回的相同的哈希码,无论给定对象的类是否覆盖 hashCode()
Object.c文件中的方法
Java_java_lang_System_identityHashCode
jvm.cpp JVM_ENTRY(jint, JVM_IHashCode(JNIEnv* env, jobject handle))
如果 handle == null 返回 0,否则执行
ObjectSynchronizer::FastHashCode (THREAD, JNIHandles::resolve_non_null(handle))
synchronizer.cpp
FastHashCode 源码如下
intptr_t ObjectSynchronizer::FastHashCode (Thread * Self, oop obj) {
if (UseBiasedLocking) {
// NOTE: many places throughout the JVM do not expect a safepoint
// to be taken here, in particular most operations on perm gen
// objects. However, we only ever bias Java instances and all of
// the call sites of identity_hash that might revoke biases have
// been checked to make sure they can handle a safepoint. The
// added check of the bias pattern is to avoid useless calls to
// thread-local storage.
if (obj->mark()->has_bias_pattern()) {
// Box and unbox the raw reference just in case we cause a STW safepoint.
Handle hobj (Self, obj) ;
// Relaxing assertion for bug 6320749.
assert (Universe::verify_in_progress() ||
!SafepointSynchronize::is_at_safepoint(),
"biases should not be seen by VM thread here");
BiasedLocking::revoke_and_rebias(hobj, false, JavaThread::current());
obj = hobj() ;
assert(!obj->mark()->has_bias_pattern(), "biases should be revoked by now");
}
}
// hashCode() is a heap mutator ...
// Relaxing assertion for bug 6320749.
assert (Universe::verify_in_progress() ||
!SafepointSynchronize::is_at_safepoint(), "invariant") ;
assert (Universe::verify_in_progress() ||
Self->is_Java_thread() , "invariant") ;
assert (Universe::verify_in_progress() ||
((JavaThread *)Self)->thread_state() != _thread_blocked, "invariant") ;
ObjectMonitor* monitor = NULL;
markOop temp, test;
intptr_t hash;
markOop mark = ReadStableMark (obj);
// object should remain ineligible for biased locking
assert (!mark->has_bias_pattern(), "invariant") ;
if (mark->is_neutral()) {
hash = mark->hash(); // this is a normal header
if (hash) { // if it has hash, just return it
return hash;
}
hash = get_next_hash(Self, obj); // allocate a new hash code
temp = mark->copy_set_hash(hash); // merge the hash code into header
// use (machine word version) atomic operation to install the hash
test = (markOop) Atomic::cmpxchg_ptr(temp, obj->mark_addr(), mark);
if (test == mark) {
return hash;
}
// If atomic operation failed, we must inflate the header
// into heavy weight monitor. We could add more code here
// for fast path, but it does not worth the complexity.
} else if (mark->has_monitor()) {
monitor = mark->monitor();
temp = monitor->header();
assert (temp->is_neutral(), "invariant") ;
hash = temp->hash();
if (hash) {
return hash;
}
// Skip to the following code to reduce code size
} else if (Self->is_lock_owned((address)mark->locker())) {
temp = mark->displaced_mark_helper(); // this is a lightweight monitor owned
assert (temp->is_neutral(), "invariant") ;
hash = temp->hash(); // by current thread, check if the displaced
if (hash) { // header contains hash code
return hash;
}
// WARNING:
// The displaced header is strictly immutable.
// It can NOT be changed in ANY cases. So we have
// to inflate the header into heavyweight monitor
// even the current thread owns the lock. The reason
// is the BasicLock (stack slot) will be asynchronously
// read by other threads during the inflate() function.
// Any change to stack may not propagate to other threads
// correctly.
}
// Inflate the monitor to set hash code
monitor = ObjectSynchronizer::inflate(Self, obj);
// Load displaced header and check it has hash code
mark = monitor->header();
assert (mark->is_neutral(), "invariant") ;
hash = mark->hash();
if (hash == 0) {
hash = get_next_hash(Self, obj);
temp = mark->copy_set_hash(hash); // merge hash code into header
assert (temp->is_neutral(), "invariant") ;
test = (markOop) Atomic::cmpxchg_ptr(temp, monitor, mark);
if (test != mark) {
// The only update to the header in the monitor (outside GC)
// is install the hash code. If someone add new usage of
// displaced header, please update this code
hash = test->hash();
assert (test->is_neutral(), "invariant") ;
assert (hash != 0, "Trivial unexpected object/monitor header usage.");
}
}
// We finally get the hash
return hash;
}
在倒数几行找到 hash = get_next_hash(Self, obj);
get_next_hash
static inline intptr_t get_next_hash(Thread * Self, oop obj) {
intptr_t value = 0 ;
if (hashCode == 0) {
// This form uses an unguarded global Park-Miller RNG,
// so it's possible for two threads to race and generate the same RNG.
// On MP system we'll have lots of RW access to a global, so the
// mechanism induces lots of coherency traffic.
value = os::random() ;
} else
if (hashCode == 1) {
// This variation has the property of being stable (idempotent)
// between STW operations. This can be useful in some of the 1-0
// synchronization schemes.
intptr_t addrBits = cast_from_oop<intptr_t>(obj) >> 3 ;
value = addrBits ^ (addrBits >> 5) ^ GVars.stwRandom ;
} else
if (hashCode == 2) {
value = 1 ; // for sensitivity testing
} else
if (hashCode == 3) {
value = ++GVars.hcSequence ;
} else
if (hashCode == 4) {
value = cast_from_oop<intptr_t>(obj) ;
} else {
// Marsaglia's xor-shift scheme with thread-specific state
// This is probably the best overall implementation -- we'll
// likely make this the default in future releases.
unsigned t = Self->_hashStateX ;
t ^= (t << 11) ;
Self->_hashStateX = Self->_hashStateY ;
Self->_hashStateY = Self->_hashStateZ ;
Self->_hashStateZ = Self->_hashStateW ;
unsigned v = Self->_hashStateW ;
v = (v ^ (v >> 19)) ^ (t ^ (t >> 8)) ;
Self->_hashStateW = v ;
value = v ;
}
value &= markOopDesc::hash_mask;
if (value == 0) value = 0xBAD ;
assert (value != markOopDesc::no_hash, "invariant") ;
TEVENT (hashCode: GENERATE) ;
return value;
}
我们可以通过VM options 来改变hashcode返回的值。
VM options hashCode 默认值
输入下列命令查询默认值
java -XX:+PrintFlagsFinal -version | findstr hashCode
可以看到 hashCode = 5
所以说默认情况下调用 hashCode方法就会返回下列计算值
else {
// Marsaglia's xor-shift scheme with thread-specific state
// This is probably the best overall implementation -- we'll
// likely make this the default in future releases.
unsigned t = Self->_hashStateX ;
t ^= (t << 11) ;
Self->_hashStateX = Self->_hashStateY ;
Self->_hashStateY = Self->_hashStateZ ;
Self->_hashStateZ = Self->_hashStateW ;
unsigned v = Self->_hashStateW ;
v = (v ^ (v >> 19)) ^ (t ^ (t >> 8)) ;
Self->_hashStateW = v ;
value = v ;
}
由此证明,书上说的 hashCode方法返回对象的内存地址是不对的。
-XX:hashCode=1
if (hashCode == 1) {
// This variation has the property of being stable (idempotent)
// between STW operations. This can be useful in some of the 1-0
// synchronization schemes.
intptr_t addrBits = cast_from_oop<intptr_t>(obj) >> 3 ;
value = addrBits ^ (addrBits >> 5) ^ GVars.stwRandom ;
}
为1时输出如下图所示
-XX:hashCode=2
if (hashCode == 2) {
value = 1 ; // for sensitivity testing
}
为2时输出如下图所示
-XX:hashCode=3
if (hashCode == 3) {
value = ++GVars.hcSequence ;
}
为3时输出如下图所示
-XX:hashCode=4
-XX:hashCode=4
为4时输出如下图所示
cast_from_oop<intptr_t>(obj)
在 1 和 4 中出现了 cast_from_oop<intptr_t>(obj) 这个方法
数组是否是连续存储的
在 hashCode = 4 的情况下,无论是 jol 还是 hashCode 都可以看到间隔 16 的数字出现,可以证明连续。
int[] ints = new int[]{10, 9, 8, 7, 6, 5, 4, 3, 2, 1};
for (int anInt : ints) {
System.out.println(VM.current().addressOf(anInt));
}
System.out.println("----");
for (int anInt : ints) {
System.out.println(System.identityHashCode(anInt));
}
--------
32061253720
32061253704
32061253688
32061253672
32061253656
32061253640
32061253624
32061253608
32061253592
32061253576
----
1996482648
1996482632
1996482616
1996482600
1996482584
1996482568
1996482552
1996482536
1996482520
1996482504
DDL语句操作列
背景:
flyway进行数据库控制时需要记录数据库操作的DDL语句,不可避免的对列进行处理。
官方文档
新增列
alter table tb_test add COLUMN address1 varchar(255) null COMMENT '备注1' after update_time;
alter table tb_test add COLUMN address2 varchar(255) null COMMENT '备注2' ;
alter table tb_test add COLUMN address3 varchar(255) not null COMMENT '备注3' ;
alter table tb_test add COLUMN address4 varchar(255) not null DEFAULT "abc" COMMENT '备注4' ;
删除列
alter table tb_test drop COLUMN address1 ;
修改列
change
- Can rename a column and change its definition, or both.
- Has more capability than MODIFY or RENAME COLUMN, but at the expense of convenience for some operations. CHANGE requires naming the column twice if not renaming it, and requires respecifying the column definition if only renaming it.
- With FIRST or AFTER, can reorder columns.
可以改名和改变定义,可以通过first 和 after 来排序。最全但是不便捷。
MODIFY
简单说,modify用来改变定义而不是改名,改定义要比 change更加方便,可以通过 first 和 after 来排序。
alter table tb MODIFY c1 varchar(255) DEFAULT NULL COMMENT '订单编号'
RENAME COLUMN
可以更改列名字但不能更改其定义。
比change改名字更加方便。
使用 FIRST 或 AFTER,可以重新排序列。
简单说,rename用来改名。
ALTER TABLE tb_test RENAME COLUMN address1 TO eddress;
ALTER TABLE tb_test RENAME COLUMN source TO target;
ALTER
Used only to change a column default value.
仅仅改变列的默认值。
alter table tb_test alter COLUMN address2 set default 1;
alter table tb_test alter COLUMN address2 drop DEFAULT;
总结
改变默认值用 alter ,RENAME COLUMN 用来修改列名,MODIFY 用来修改定义,change全都可以改但是不方便。