了解过python字典的人都知道一句话,字典的键必须不可变,所以可以用数,字符串或元组充当,列表就不行。
那么,为什么可变类型不能当作键呢?
自定义类型可以做键吗?
首先,我们看看list做键值的情况
lsta = [1,2,3]
dicta= {lsta:1}
>>>TypeError: unhashable type: 'list'
看了字典的源码dictobject.c
static int
insertdict(register PyDictObject*mp, PyObject *key, long hash, PyObject *value)
其中有个关键参数long hash,这个值是通过key调用自己的hash方法计算出来的,每一种不同的类型都定义了自己的hash方法。
Int类型intobject.c:
PyTypeObject PyInt_Type ={
...
(hashfunc)int_hash,/* tp_hash */...
}
static long
int_hash(PyIntObject*v)
{/* XXX If this ischanged, you also need to change the way
Python's long, float and complex types are hashed. */
long x = v ->ob_ival;if (x == -1)
x= -2;returnx;
}
String类型stringobject.c:
PyTypeObject PyString_Type = {
...
(hashfunc)string_hash, /* tp_hash */...
}
static long
string_hash(PyStringObject*a)
{
register Py_ssize_t len;
register unsigned char*p;
register long x;if (a->ob_shash != -1)return a->ob_shash;
len=Py_SIZE(a);
p= (unsigned char *) a->ob_sval;
x= *p << 7;while (--len >=0)
x= (1000003*x) ^ *p++;
x^=Py_SIZE(a);if (x == -1)
x= -2;
a->ob_shash =x;returnx;
}
Tuple类型tupleobject.c:
PyTypeObject PyTuple_Type = {
...
(hashfunc)tuplehash,/* tp_hash */...
}
static long
tuplehash(PyTupleObject*v)
{
register long x, y;
register Py_ssize_t len=Py_SIZE(v);
register PyObject**p;
long mult= 1000003L;
x= 0x345678L;
p= v->ob_item;while (--len >=0) {
y= PyObject_Hash(*p++);if (y == -1)return -1;
x= (x ^ y) *mult;/* the cast might truncate len; that doesn't change hash stability */
mult += (long)(82520L + len +len);
}
x+= 97531L;if (x == -1)
x= -2;returnx;
}
List类型listobject.c:
PyTypeObject PyList_Type ={
...
(hashfunc)PyObject_HashNotImplemented,/* tp_hash */...
}
PyObject_HashNotImplemented实在基类object中定义的object.c:
long
PyObject_HashNotImplemented(PyObject*self)
{
PyErr_Format(PyExc_TypeError,"unhashable type: '%.200s'",
self->ob_type->tp_name);return -1;
}
long
PyObject_Hash(PyObject*v)
{
PyTypeObject*tp = v->ob_type;if (tp->tp_hash !=NULL)return (*tp->tp_hash)(v);if (tp->tp_compare == NULL && RICHCOMPARE(tp) ==NULL) {return _Py_HashPointer(v); /* Use address as hash value */}/* If there's a cmp but no hash defined, the object can't be hashed */
returnPyObject_HashNotImplemented(v);
}
从上面就可以看出为什么list不能作为键,因为list类型的hash函数是PyObject_HashNotImplemented,这个函数执行时会抛出TypeError异常。
同时,从上面tuplehash函数可以看出,tuple类型虽然可以做键值,但是如果tuple容器里的子项无法hash,那么这个tuple依然不能做键值,比如下面把list放到tuple容器中:
lista = [1,2,3]
tuplea=(lista,)
dicta= {tuplea:1}>>>TypeError: unhashable type: 'list'
对于自定义类型,如果没有重写__hash__函数,则会使用基类object的hash函数,默认返回对象的地址,如果重写了hash函数,则根据新的hash函数
#1)重写hash函数,抛出异常,无法作为键值
classmy_class(object):def __hash__(self):raise TypeError, "unhashable type: my_class"obj=my_class()
dicta= {obj:1}>>>TypeError: unhashable type: my_class
#2)重写hash函数,不抛出异常
classmy_class2(object):def __init__(self, value):
self.value=valuedef __hash__(self):returnself.value
obj= my_class2(1)
obj2= my_class2(2)
obj3= my_class2(1)
dicta= {obj:1, obj2:2, obj3:3}printhash(obj)printhash(obj2)printhash(obj3)printdicta>>>18
1
2
1{<__main__.my_class2 object at 0x01A94490>: 1, <__main__.my_class2 object at 0x01A944B0>: 2, <__main__.my_class2 object at 0x01A944D0>: 3}
#3)不重写hash函数
classmy_class3(object):passobj=my_class3()
obj2=my_class3()
dicta= {obj:1, obj2:2}printhash(obj)printhash(obj2)printdicta>>>29050192
29050224{<__main__.my_class3 object at 0x01BB4550>: 1, <__main__.my_class3 object at 0x01BB4570>: 2}