要实现对象的相等比较,需要实现IEquatable<T>,或单独写一个类实现IEqualityComparer<T>接口。
像List<T>的Contains这样的函数,如果我们自己定义的对象不实现IEquatable<T>接口,这个函数会默认调用object的Equels来比较对象,得出非预期的结果。
先自定义一个类:
public
class
DaichoKey
{
public
int
ID {
get
;
set
; }
public
int
SubID {
get
;
set
; }
}
|
List<DaichoKey> lst =
new
List<DaichoKey>() {
new
DaichoKey(){ID = 1,SubID =2},
new
DaichoKey(){ID = 1,SubID = 3}
};
var
newItem =
new
DaichoKey() { ID = 1, SubID = 2 };
bool
isContains = lst.Contains(newItem);
//false
|
上面的代码调用Contains后得到false,我们预想1和2的对象都已经存在了,应该得到true才对呀。
要实现这个效果,需要实现IEquatable<T>接口。
public
class
DaichoKey : IEquatable<DaichoKey>
{
public
int
ID {
get
;
set
; }
public
int
SubID {
get
;
set
; }
public
bool
Equals(DaichoKey other)
{
return
this
.ID == other.ID &&
this
.SubID == other.SubID;
}
}
|
经过上面的改良,结果如我们预期了,但是还不够完善,微软建议我们重写object的Equels方法我GetHashCode方法,以保持语义的一致性,于是有了下面的代码:
public
class
DaichoKey : IEquatable<DaichoKey>
{
public
int
ID {
get
;
set
; }
public
int
SubID {
get
;
set
; }
public
bool
Equals(DaichoKey other)
{
return
this
.ID == other.ID &&
this
.SubID == other.SubID;
}
public
override
bool
Equals(
object
obj)
{
if
(obj ==
null
)
return
base
.Equals(obj);
if
(obj
is
DaichoKey)
return
Equals(obj
as
DaichoKey);
else
throw
new
InvalidCastException(
"the 'obj' Argument is not a DaichoKey object"
);
}
public
override
int
GetHashCode()
{
return
base
.GetHashCode();
//return object's hashcode
}
}
|
上面的代码依然还有缺陷,没重写==和!=运算符,但这不是本文讨论的重点。绕了一大圈,终于来到了GetHashCode函数身上,貌似他对我们的Contains函数没有啥影响呀,不重写又何妨?我们再来试试List<T>的一个扩展函数Distinct:
List<DaichoKey> lst =
new
List<DaichoKey>() {
new
DaichoKey(){ID = 1,SubID =2},
new
DaichoKey(){ID = 1,SubID = 3}
};
var
newItem =
new
DaichoKey() { ID = 1, SubID = 2 };
lst.Add(newItem);
if
(lst !=
null
)
{
lst = lst.Distinct<DaichoKey>().ToList();
}
//result:
//1 2
//1 3
//1 2
|
悲剧发生了,数据1,2的重复数据没有被去掉呀,我们不是实现了IEquatable<T>接口接口吗。在园子上找到了一篇文章(c# 扩展方法奇思妙用基础篇八:Distinct 扩展),在回复中提到要将GetHashCode返回固定值,以强制调用IEquatable<T>的Equels方法。如下:
public
class
DaichoKey : IEquatable<DaichoKey>
{
public
int
ID {
get
;
set
; }
public
int
SubID {
get
;
set
; }
public
bool
Equals(DaichoKey other)
{
return
this
.ID == other.ID &&
this
.SubID == other.SubID;
}
public
override
bool
Equals(
object
obj)
{
if
(obj ==
null
)
return
base
.Equals(obj);
if
(obj
is
DaichoKey)
return
Equals(obj
as
DaichoKey);
else
throw
new
InvalidCastException(
"the 'obj' Argument is not a DaichoKey object"
);
}
public
override
int
GetHashCode()
{
return
0;
//base.GetHashCode();
}
}
|
结果立马就对了,难道是这个Distinct函数在比较时,先比较的HashCode值?
带着这个疑问,反编译了下Distinct的代码,确实如我所猜测的那样。下面是源代码,有兴趣的同学,可以往下看看:
public
static
IEnumerable<TSource> Distinct<TSource>(
this
IEnumerable<TSource> source)
{
if
(source ==
null
)
throw
Error.ArgumentNull(
"source"
);
return
DistinctIterator<TSource>(source,
null
);
}
private
static
IEnumerable<TSource> DistinctIterator<TSource>(IEnumerable<TSource> source, IEqualityComparer<TSource> comparer)
{
<DistinctIterator>d__81<TSource> d__ =
new
<DistinctIterator>d__81<TSource>(-2);
d__.<>3__source = source;
d__.<>3__comparer = comparer;
return
d__;
}
private
sealed
class
<DistinctIterator>d__81<TSource> : IEnumerable<TSource>, IEnumerable, IEnumerator<TSource>, IEnumerator, IDisposable
{
// Fields
private
int
<>1__state;
private
TSource <>2__current;
public
IEqualityComparer<TSource> <>3__comparer;
public
IEnumerable<TSource> <>3__source;
public
IEnumerator<TSource> <>7__wrap84;
private
int
<>l__initialThreadId;
public
TSource <element>5__83;
public
Set<TSource> <
set
>5__82;
public
IEqualityComparer<TSource> comparer;
public
IEnumerable<TSource> source;
// Methods
[DebuggerHidden]
public
<DistinctIterator>d__81(
int
<>1__state);
private
void
<>m__Finally85();
private
bool
MoveNext();
[DebuggerHidden]
IEnumerator<TSource> IEnumerable<TSource>.GetEnumerator();
[DebuggerHidden, TargetedPatchingOptOut(
"Performance critical to inline this type of method across NGen image boundaries"
)]
IEnumerator IEnumerable.GetEnumerator();
[DebuggerHidden]
void
IEnumerator.Reset();
void
IDisposable.Dispose();
// Properties
TSource IEnumerator<TSource>.Current { [DebuggerHidden]
get
; }
object
IEnumerator.Current { [DebuggerHidden]
get
; }
}
private
sealed
class
<DistinctIterator>d__81<TSource> : IEnumerable<TSource>, IEnumerable, IEnumerator<TSource>, IEnumerator, IDisposable
{
// Fields
private
int
<>1__state;
private
TSource <>2__current;
public
IEqualityComparer<TSource> <>3__comparer;
public
IEnumerable<TSource> <>3__source;
public
IEnumerator<TSource> <>7__wrap84;
private
int
<>l__initialThreadId;
public
TSource <element>5__83;
public
Set<TSource> <
set
>5__82;
public
IEqualityComparer<TSource> comparer;
public
IEnumerable<TSource> source;
// Methods
[DebuggerHidden]
public
<DistinctIterator>d__81(
int
<>1__state);
private
void
<>m__Finally85();
private
bool
MoveNext();
[DebuggerHidden]
IEnumerator<TSource> IEnumerable<TSource>.GetEnumerator();
[DebuggerHidden, TargetedPatchingOptOut(
"Performance critical to inline this type of method across NGen image boundaries"
)]
IEnumerator IEnumerable.GetEnumerator();
[DebuggerHidden]
void
IEnumerator.Reset();
void
IDisposable.Dispose();
// Properties
TSource IEnumerator<TSource>.Current { [DebuggerHidden]
get
; }
object
IEnumerator.Current { [DebuggerHidden]
get
; }
}
private
bool
MoveNext()
{
bool
flag;
try
{
switch
(
this
.<>1__state)
{
case
0:
this
.<>1__state = -1;
this
.<
set
>5__82 =
new
Set<TSource>(
this
.comparer);
this
.<>7__wrap84 =
this
.source.GetEnumerator();
this
.<>1__state = 1;
goto
Label_0092;
case
2:
this
.<>1__state = 1;
goto
Label_0092;
default
:
goto
Label_00A5;
}
Label_0050:
this
.<element>5__83 =
this
.<>7__wrap84.Current;
if
(
this
.<
set
>5__82.Add(
this
.<element>5__83))
{
this
.<>2__current =
this
.<element>5__83;
this
.<>1__state = 2;
return
true
;
}
Label_0092:
if
(
this
.<>7__wrap84.MoveNext())
goto
Label_0050;
this
.<>m__Finally85();
Label_00A5:
flag =
false
;
}
fault
{
this
.System.IDisposable.Dispose();
}
return
flag;
}
internal
class
Set<TElement>
{
// Fields
private
int
[] buckets;
private
IEqualityComparer<TElement> comparer;
private
int
count;
private
int
freeList;
private
Slot<TElement>[] slots;
// Methods
[TargetedPatchingOptOut(
"Performance critical to inline this type of method across NGen image boundaries"
)]
public
Set();
public
Set(IEqualityComparer<TElement> comparer);
public
bool
Add(TElement value);
[TargetedPatchingOptOut(
"Performance critical to inline this type of method across NGen image boundaries"
)]
public
bool
Contains(TElement value);
private
bool
Find(TElement value,
bool
add);
internal
int
InternalGetHashCode(TElement value);
public
bool
Remove(TElement value);
private
void
Resize();
// Nested Types
[StructLayout(LayoutKind.Sequential)]
internal
struct
Slot
{
internal
int
hashCode;
internal
TElement value;
internal
int
next;
}
}
public
bool
Add(TElement value)
{
return
!
this
.Find(value,
true
);
}
public
bool
Contains(TElement value)
{
return
this
.Find(value,
false
);
}
private
bool
Find(TElement value,
bool
add)
{
int
hashCode =
this
.InternalGetHashCode(value);
for
(
int
i =
this
.buckets[hashCode %
this
.buckets.Length] - 1; i >= 0; i =
this
.slots[i].next)
{
if
(
this
.slots[i].hashCode == hashCode &&
this
.comparer.Equals(
this
.slots[i].value, value))
return
true
;
//就是这一句了
}
if
(add)
{
int
freeList;
if
(
this
.freeList >= 0)
{
freeList =
this
.freeList;
this
.freeList =
this
.slots[freeList].next;
}
else
{
if
(
this
.count ==
this
.slots.Length)
this
.Resize();
freeList =
this
.count;
this
.count++;
}
int
index = hashCode %
this
.buckets.Length;
this
.slots[freeList].hashCode = hashCode;
this
.slots[freeList].value = value;
this
.slots[freeList].next =
this
.buckets[index] - 1;
this
.buckets[index] = freeList + 1;
}
return
false
;
}
|
在这段代码中可以看出,扩展函数Distinct在内部使用了一个Set<T>的类来帮助踢掉重复数据,而这个内部类使用的是hash表的方式存储数据,所以会调用到我们自定义类的GetHashCode函数,如果返回的hashcode值不等,它就不会再调用Equels方法进行比较了。
原因已经一目了然了,得出的结论就是:
1,重写Equles方法的时候,尽量重写GetHashCode函数,并且不要简单的调用object的GetHashCode函数,返回一个设计合理的hash值,以保证结果如我们的预期。上面的做法直接返回了0,虽然解决了问题,但明显不是每个对象的hash值都是0,做法欠妥。
2,List<T>的Contains,IndexOf方法,不会用到GetHashCode函数。
3,扩展函数Distinct,Except用到了GetHashCode函数,必须重写这个函数。其他还有哪些函数用到了GetHashCode函数,以后再做补充,使用时多加注意就是了。
4,如果对象要作为字典类(Dictionary)的主键,必须重写GetHashCode函数。
2014/07/08 补充
5,HashSet等容器的Add方法内部,也是先判断GetHashCode,如果GetHashCode值相等,进一步判断Equals方法是否相等来确定对象的相等性。
所以,Equals是相等的,那么GetHashCode也必须要保证相等。相反却不一定,GetHashCode相等,Equals方法可以不等。
6,改变影响GetHashCode返回值的字段值,会造成对象的HashCode值变化,如果对象已经存入了HashSet等容器中,将会是HashSet找不到这个对象,从而使得Remove等方法失败。
Point a = new Point(1, 2); Point b = new Point(1, 2); HashSet<Point> hashSet = new HashSet<Point>(); hashSet.Add(a); hashSet.Remove(b); //能删除a吗?答案是可以 //hashset的Count变为0,原因就是我们重新了Equals方法,a和 //b被认为相等的。
7,记录一个自定义值类型重写GetHashCode等方法的完整实现,作为参考。
1 public struct Point 2 { 3 private int x; 4 private int y; 5 public Point(int x, int y) 6 { 7 this.x = x; 8 this.y = y; 9 } 10 public int X 11 { 12 get { return x; } 13 } 14 public int Y 15 { 16 get { return y; } 17 } 18 19 public static bool operator ==(Point left,Point right) 20 { 21 if (object.ReferenceEquals(left, null)) 22 return object.ReferenceEquals(right, null); 23 return left.Equals(right); 24 } 25 26 public static bool operator !=(Point left, Point right) 27 { 28 return !(left == right); 29 } 30 31 public override bool Equals(object obj) 32 { 33 if (obj.GetType() != typeof(Point)) 34 return false; 35 Point other = (Point)obj; 36 return this.x == other.x && this.y == other.y; 37 } 38 39 public override int GetHashCode() 40 { 41 return x.GetHashCode() ^ y.GetHashCode(); 42 } 43 }