import hashlib # recursively calculate each element block's hashcode, and reorder the child nodes in the list basing on the hashcode # finally will get the ordered json object and overall hashcode def ordered(jsonNode): if isinstance(jsonNode, dict): # object node for key in jsonNode.keys(): value = jsonNode[key] sortedValue, hashCode = ordered(value) # hashCode is not used here, python reorder a dict by their keys defaultly jsonNode[key] = sortedValue return jsonNode, hashlib.sha256(repr(jsonNode)).hexdigest() elif isinstance(jsonNode, list): # list node itemWithHash = {} hashConflictCount = {} for item in jsonNode: newItem, hashCode = ordered(item) if hashCode in itemWithHash.keys(): # repeating nodes in this list, count + 1 hashConflictCount[hashCode] += 1 else: # new node to add, first time see it itemWithHash[hashCode] = newItem hashConflictCount[hashCode] = 1 # sort nodes in the list by their hash code sortedHash = sorted(itemWithHash.keys()) # reconstruct the whole list basing on the order of their node hash codes newList = [] for key in sortedHash: count = hashConflictCount[key] for i in range(count): newList.append(itemWithHash[key]) return newList, hashlib.sha256(repr(newList)).hexdigest() else: # simple data type node. Either list's element or object's value part in key-value pair return jsonNode, hashlib.sha256(repr(jsonNode)).hexdigest()
重新格式化:
# format a json object, indent on various layers
###
#NOTES:
#True -> true
#False -> false
#None -> null
#u'XXXXX' -> 'XXXXX'
#'XXXXX' -> "XXXXX"
# when saving json object to a file, better use json.dump() rather than text write
###
def format(jsonNode, level=0):
INDENT = '\t'
NEWLINE = '\n'
if isinstance(jsonNode, dict):
longStr = '{'+NEWLINE
keys = jsonNode.keys()
for index in range(len(keys)):
key = keys[index]
value = jsonNode[key]
formattedValue = format(value, level + 1)
if formattedValue.endswith(NEWLINE):
formattedValue = formattedValue[:-len(NEWLINE)]
if index != len(keys) - 1:
# not yet the last one
longStr += '{}"{}": {},{}'.format(INDENT*level, key, formattedValue, NEWLINE)
else:
# the last one
longStr += '{}"{}": {}{}'.format(INDENT*level, key, formattedValue, NEWLINE)
longStr += INDENT * level + '}'+NEWLINE
if level == 0:
# final fix before returning
longStr = longStr.replace(NEWLINE+NEWLINE,NEWLINE)
longStr = longStr.replace('}'+NEWLINE+','+INDENT , '},'+NEWLINE+INDENT)
longStr = longStr.replace('}'+NEWLINE+','+NEWLINE , '},'+NEWLINE)
return longStr
elif isinstance(jsonNode, list):
longStr = '['+ NEWLINE
size = len(jsonNode)
for index in range(size):
item = jsonNode[index]
formattedItem = format(item, level + 1)
if index != size - 1:
# not yet the last one
longStr += (INDENT*level + formattedItem + ',' + NEWLINE)
else:
# the last one
longStr += (INDENT*level + formattedItem + NEWLINE)
longStr += INDENT * level + ']'+NEWLINE
return longStr
else:
if isinstance(jsonNode, unicode):
reprUnic = repr(jsonNode)
if reprUnic.startswith("u'") and reprUnic.endswith("'"):
return '\"' + reprUnic[2:-1].replace('"', "\\\"") + '\"'
elif reprUnic.startswith('u"') and reprUnic.endswith('"'):
return '\"' + reprUnic[2:-1].replace('"', "\\\"") + '\"'
if isinstance(jsonNode, str):
reprStr = repr(jsonNode)
if reprStr.startswith("'") and reprStr.endswith("'"):
return '\"' + reprStr[2:-1].replace('"', "\\\"") + '\"'
else:
return reprStr
elif jsonNode is None:
return "null"
elif jsonNode is True:
return "true"
elif jsonNode is False:
return "false"
return repr(jsonNode)