tornado会使用_unicode对字符串进行解码,但是没有对被解码的字符串的字符集进行判断,默认当utf8,这样对非utf8字符串可能解码失败。
以下是tornado3.2的代码:
_unicode = to_unicode
def to_unicode(value):
"""Converts a string argument to a unicode string.
If the argument is already a unicode string or None, it is returned
unchanged. Otherwise it must be a byte string and is decoded as utf8.
"""
if isinstance(value, _TO_UNICODE_TYPES):
return value
if not isinstance(value, bytes_type):
raise TypeError(
"Expected bytes, unicode, or None; got %r" % type(value)
)
return value.decode("utf-8")
_unicode = to_unicode
def to_unicode(value):
"""Converts a string argument to a unicode string.
If the argument is already a unicode string or None, it is returned
unchanged. Otherwise it must be a byte string and is decoded as utf8.
"""
if isinstance(value, _TO_UNICODE_TYPES):
return value
if not isinstance(value, bytes_type):
raise TypeError(
"Expected bytes, unicode, or None; got %r" % type(value)
)
return value.decode("utf-8")