记录自己遇到的问题:从json中读取中文乱码,希望获得的数据是字典形式且中文不乱码,用dump把数据处理之后不是字典形式了,把处理后的数据再变为字典则又乱码。
解决办法如下:
#! -*- coding:utf-8 -*-
from __future__ import print_function
import json
import sys
reload(sys) # Python2.5 初始化后删除了 sys.setdefaultencoding 方法,我们需要重新载入
sys.setdefaultencoding('utf-8')
def json_load_byteified(file_handle):
return _byteify(
json.load(file_handle, object_hook=_byteify),
ignore_dicts=True
)
def json_loads_byteified(json_text):
return _byteify(
json.loads(json_text, object_hook=_byteify),
ignore_dicts=True
)
def _byteify(data, ignore_dicts = False):
if isinstance(data, str):
return data
# If this is a list of values, return list of byteified values
if isinstance(data, list):
return [ _byteify(item, ignore_dicts=True) for item in data ]
# If this is a dictionary, return dictionary of byteified keys and values
# but only if we haven't already byteified it
if isinstance(data, dict) and not ignore_dicts:
return {
_byteify(key, ignore_dicts=True): _byteify(value, ignore_dicts=True)
for key, value in data.items() # changed to .items() for Python 2.7/3
}
# Python 3 compatible duck-typing
# If this is a Unicode string, return its string representation
if str(type(data)) == "<type 'unicode'>":
return data.encode('utf-8')
# If it's anything else, return it in its original form
return data
with open('a.json', 'r') as file:
print(json_load_byteified(file).get('你好'))