# -*- coding: utf-8 -*- import sys, os, re class JsonBaseType: single_type = 0 object_type = 1 array_type = 2 class ParseException(): def __init__(self,error): self.error = error def __str__(self,*args,**kwargs): return self.error # NUMBER_RE = re.compile( # r'(-?(?:0|[1-9]\d*))(\.\d+)?([eE][-+]?\d+)?', # (re.VERBOSE | re.MULTILINE | re.DOTALL)) class JsonHelper(object): def __init__(self): self.jsObjectType = JsonBaseType.single_type self.dictResult = dict() self.jsEleName = '' self.jsArrayValue = [] def __getitem__(self, item): return self.dictResult[item] def JsonParse(self, jsString): jsString = jsString.strip() jsString = jsString.replace('\r', '').replace('\n', '') if len(jsString) > 0: if jsString[0] == '{': self.jsObjectType = JsonBaseType.object_type self.ObjectParse(jsString[1:]) elif jsString[0] == '[': self.jsObjectType = JsonBaseType.array_type self.ArrayParse(jsString[1:]) else: jsString, ret_object, ret_string = self.SingleObjectParse(jsString) self.dictResult[''] = ret_object if 0 != len(ret_string): self.dictResult['String' + ret_string] = ret_string else: raise ParseException('JsonParse json error 2|||' + jsString) return self.dictResult def ArrayParse(self, jsString): nIdxRet = 0 bEndObject = False bCorrectFormat = True nLenString = len(jsString) jsStringBak = '' bCorrectFormat, bEndObject = self.CheckBlankByte(jsString, 0) if not bCorrectFormat: raise ParseException('ArrayParse json error 1|||' + jsString[0:20]) nPos = jsString.find('{') if -1 != nPos: jsString = jsString[nPos+1:] while True: nIdxTmp = len(jsString) jsString = jsString.strip() jsString = jsString[1:] nIdxRet += nIdxTmp - len(jsString) + 1#清除开头的空白字符,并更新当前位置 jsObj = JsonHelper() nIdxTmp, bEndObject = jsObj.ObjectParse(jsString, True)#非子节点 self.jsArrayValue.append(jsObj) nIdxRet += nIdxTmp jsString = jsString[len(jsString)-nIdxTmp:]#抛弃已解析完的,保留还未解析的 if 0 == len(jsString): break if bEndObject: bCorrectFormat, bEndObject = self.CheckBlankByte(jsString, 5) if not bCorrectFormat: raise ParseException('ArrayParse json error 1|||' + jsString[0:20]) else: if bEndObject: break nPos = jsString.find(',')#直接找下一个object if -1 != nPos: jsString = jsString[nPos:] bCorrectFormat, bEndObject = self.CheckBlankByte(jsString, 6) # 1、{到"之间不能有其他可见字符 if bCorrectFormat: nPos = jsString.find('"') if -1 != nPos: jsString = jsString[nPos:] jsString = jsString.strip() else: bCorrectFormat, bEndObject = self.CheckBlankByte(jsString, 7) if not bCorrectFormat: raise ParseException('ArrayParse json error 2|||' + jsString[0:20]) else: if bEndObject: nPos = jsString.find(']') if -1 != nPos: jsString = jsString[nPos+1:] jsString = jsString.strip() break jsStringBak = jsString self.dictResult[self.jsEleName] = self.jsArrayValue self.jsArrayValue = [] return len(jsStringBak), bEndObject def ArrayParseEx(self, jsString): jsString = jsString.strip() nIdxRet = 0 bEndObject = False bCorrectFormat = True nLenString = len(jsString) jsStringBak = '' while True: if jsString[0] == '[': pass elif jsString[0] == '{': pass else: jsString, ret_object, ret_string = self.SingleObjectParse(jsString) self.jsArrayValue.append(ret_object) if 0 != len(ret_string): self.dictResult['String'] = ret_string jsString = jsString.strip() if 0 == len(jsString): break # 就代码====================================================== if bEndObject: bCorrectFormat, bEndObject = self.CheckBlankByte(jsString, 5) if not bCorrectFormat: raise ParseException('ArrayParse json error 1|||' + jsString[0:20]) else: if bEndObject: break nPos = jsString.find(',') # 直接找下一个object if -1 != nPos: jsString = jsString[nPos:] bCorrectFormat, bEndObject = self.CheckBlankByte(jsString, 6) # 1、{到"之间不能有其他可见字符 if bCorrectFormat: nPos = jsString.find('"') if -1 != nPos: jsString = jsString[nPos:] jsString = jsString.strip() else: bCorrectFormat, bEndObject = self.CheckBlankByte(jsString, 7) if not bCorrectFormat: raise ParseException('ArrayParse json error 2|||' + jsString[0:20]) else: if bEndObject: nPos = jsString.find(']') if -1 != nPos: jsString = jsString[nPos + 1:] jsString = jsString.strip() break jsStringBak = jsString self.dictResult[self.jsEleName] = self.jsArrayValue self.jsArrayValue = [] return len(jsStringBak), bEndObject def ObjectParse(self, jsString, bChildObj = False, bArrayObj = False): nIdxRet = 0 bEndObject = False nLenString = len(jsString) jsStringBak = '' while True: nIdxTmp = len(jsString) jsString = jsString.strip() if jsString[0] == '"': jsString = jsString[1:] jsStringBak = jsString nIdxTmp, bEndObject = self.ElementParse(jsString) if bEndObject: jsString = jsString[len(jsString) - nIdxTmp:] if bChildObj or bArrayObj: break else: jsString = jsString[len(jsString)-nIdxTmp+1:]#抛弃已解析完的,保留还未解析的 if 0 == len(jsString): break jsStringBak = jsString return len(jsStringBak), bEndObject def SingleObjectParse(self, jsString): ret_object = None ret_string = '' if jsString[0] == 'n' and jsString[0:4] == 'null': jsString = jsString[4:] elif jsString[0] == 't' and jsString[0:4] == 'true': ret_object = True jsString = jsString[4:] elif jsString[0] == 'f' and jsString[0:5] == 'false': ret_object = False jsString = jsString[5:] elif jsString[0] == '"': ret_value, tmp_pos, end_object = self.ElementStringParse(jsString[1:], False, True) ret_object = ret_value jsString = jsString[len(jsString) - tmp_pos + 1:] else: match_res = re.match(r'(-?(?:0|[1-9]\d*))(\.\d+)?([eE][-+]?\d+)?', jsString, re.M | re.I) if match_res: try: str_num = match_res.group() try: ret_object = int(str_num) except ValueError: ret_object = float(str_num) # 指数可能会失败,如果精度要求太高的话 ret_string = str_num idx_ret = jsString.find(str_num) + len(str_num) jsString = jsString[idx_ret:] except ValueError: raise ParseException('SingleObjectParse json error 1|||' + jsString) else: raise ParseException('SingleObjectParse json error 2|||' + jsString) # 校验普通值后面的数据,值后面还有数值格式就不对了 jsString = jsString.strip() if 0 != len(jsString): raise ParseException('SingleObjectParse json error 3|||' + jsString) return jsString, ret_object, ret_string #进到ElementParse已经是去掉当前Element的第一个"(ps userId": "U46554751"...) def ElementParse(self, jsString): chNext = '' bEndObject = False # 当前object是否结束 nLenString = len(jsString) nIdxTmp = 0 nIdxRet = len(jsString) jsString = jsString.strip() nIdxRet = nIdxRet - len(jsString) self.jsEleName, nIdxTmp, bEndObject = self.ElementStringParse(jsString)# 获得元素名 jsString = jsString[nLenString - nIdxTmp + 1:]#把前面已解析完蛋抛弃掉 jsStringBak = jsString nIdxTmp = len(jsString) jsString = jsString.strip()#移除开头空白字符 nIdxRet += nIdxTmp - len(jsString) nIdxTmp = 0 bCorrectFormat = True#默认是符合空白字符规则的 if jsString[0:4] == 'true' or jsString[0:4] == 'null' or jsString[0:5] == 'false': bCorrectFormat = True else: matchRes = re.match(r'(-?(?:0|[1-9]\d*))(\.\d+)?([eE][-+]?\d+)?', jsString, re.M | re.I) if not matchRes: bCorrectFormat, bEndObject = self.CheckBlankByte(jsString, 2) if not bCorrectFormat:#element name取出来后马上检查:到值之间是否有违规字符 raise ParseException('ElementParse json error 1|||' + jsString[0:20]) bNtf = False #是否是null,true,false三种之一,这时候需要校验当前object是否结束,是否符合空白规则 for idx in range(len(jsString)): try: chNext = jsString[idx] except IndexError: raise ParseException('ElementParse visit overstep the boundary 1|||' + jsString[idx:idx+20]) if bNtf: # null,true,false以及数值型数据这几种类型值取出来后马上检查结尾是否有违规字符(非空白),结尾可能是,也可能是} bCorrectFormat, bEndObject = self.CheckBlankByte(jsString[nIdxTmp:], 4) if not bCorrectFormat: raise ParseException('ElementParse json error 2|||' + jsString[nIdxTmp:nIdxTmp+20]) if bEndObject:#解析到这里,说明当前的element已经结完,接下去不是空白字符就是,或者} nIdxRet = jsString.find('}') + 1 else: nIdxRet = jsString.find(',') + 1 jsStringBak = jsString[nIdxRet:] nIdxTmp = 0 break if chNext == 'n' and jsString[idx:idx + 4] == 'null': bNtf = True nIdxTmp = idx + 4 nIdxRet = nIdxRet + 4 jsStringBak = jsStringBak[nIdxRet:] self.dictResult[self.jsEleName] = None continue elif chNext == 't' and jsString[idx:idx + 4] == 'true': bNtf = True nIdxTmp = idx + 4 nIdxRet = nIdxRet + 4 jsStringBak = jsStringBak[nIdxRet:] self.dictResult[self.jsEleName] = True continue elif chNext == 'f' and jsString[idx:idx + 5] == 'false': bNtf = True nIdxTmp = idx + 5 nIdxRet = nIdxRet + 5 jsStringBak = jsStringBak[nIdxRet:] self.dictResult[self.jsEleName] = False continue elif chNext == '"':#字符串 jsStringValue, nIdxTmp, bEndObject = self.ElementStringParse(jsString[1:], True) jsString = jsString[len(jsString)-nIdxTmp+1:] jsStringBak = jsString nIdxRet = nIdxRet + nIdxTmp + 1 self.dictResult[self.jsEleName] = jsStringValue nIdxTmp = 0 break elif chNext == '[': if jsString[0] == '[': jsString = jsString[1:] if 0 != len(self.jsArrayValue): self.jsArrayValue = [] nIdxTmp, bEndObject = self.ArrayParse(jsString) jsString = jsString[len(jsString) - nIdxTmp + 1:] jsStringBak = jsString nIdxRet += nIdxTmp nIdxTmp = 0 break elif chNext == '{': jsObjectValue = JsonHelper() strRes = jsString[idx+1:]#从{的下一个位置开始解析接下来的object strRes = strRes.strip() nIdxTmp, bEndObject = jsObjectValue.ObjectParse(strRes, True) self.dictResult[self.jsEleName] = jsObjectValue if bEndObject: jsString = strRes[len(strRes) - nIdxTmp:] else: jsString = strRes[len(strRes) - nIdxTmp + 1:] jsStringBak = jsString if bEndObject: bCorrectFormat, bEndObject = self.CheckBlankByte(jsString, 4)#判断是否还有下一个element,并校验格式 if not bCorrectFormat: raise ParseException('ElementParse json error 3|||' + jsString[0:20]) else: if not bEndObject: nIdxTmp = jsString.find(',') if -1 != nIdxTmp: jsString = jsString[nIdxTmp+1:] nIdxRet = nIdxRet + nIdxTmp + 1 jsStringBak = jsString bCorrectFormat, bEndObject = self.CheckBlankByte(jsString, 3)#还有下一个element,校验,到"之间的格式 if not bCorrectFormat: raise ParseException('ElementParse json error 4|||' + jsString[0:20]) else: nIdxTmp = jsString.find('"') if -1 != nIdxTmp: jsString = jsString[nIdxTmp:] nIdxRet = nLenString - len(jsString) jsStringBak = jsString else: nIdxTmp = jsString.find('}') if -1 != nIdxTmp: jsString = jsString[nIdxTmp + 1:] jsStringBak = jsString nIdxTmp = 0 break else: #其他情况解析为数据,如果无法解析,则json格式错了 strNum = jsString.strip() matchRes = re.match(r'(-?(?:0|[1-9]\d*))(\.\d+)?([eE][-+]?\d+)?', strNum, re.M | re.I) if matchRes: bNtf = True try: strNum = matchRes.group() try: self.dictResult[self.jsEleName] = int(strNum) except ValueError: self.dictResult[self.jsEleName] = float(strNum)#指数可能会失败,如果精度要求太高的话 self.dictResult[self.jsEleName+'String'] = strNum nIdxRet = nIdxRet + jsString.find(strNum) + len(strNum) nIdxTmp = len(strNum) jsStringBak = jsString[nIdxRet:] except ValueError: raise ParseException('ElementParse json error 5|||' + jsString[0:20]) else: raise ParseException('ElementParse json error 6|||' + jsString[0:20]) return len(jsStringBak), bEndObject # 类型名失败测试 # 'us%"e\\"r\\\\%c Pho\'n:e": \"13030\"rc Pho\'n:303131\"' # 类型名成功用力 # 'us%e\\"r\\\\%c Pho\'n:e": \"13030\"rc Pho\'n:303131\"' # ‘userName": "张三李四王麻子",’ # 值失败测试 # '13030\"rc Pho\'n:303131"sdf,' # '张三李四王麻子" qw}' # 值成功测试 # '13030\"rc Pho\'n:303131",' # ‘张三李四王麻子"}’ def ElementStringParse(self, jsString, bVal = False, bNormalString = False): nRet = 0#返回解析完成之后开始继续遍历字符串的位置 strRet = ''#返回字符串值 ch = ''#临时字符 bEnd = False#字符串是否结束了 bEndObject = False#当前object是否结束 nLenString = len(jsString) for n in range(nLenString): try: ch = jsString[n] except ValueError: raise ParseException('ElementStringParse visit overstep the boundary 1|||' + jsString[n:n+20]) nRet = n + 1 if not bEnd: if ch == '\\': chNext = '' chPre = '' try: chNext = jsString[n+1] if n > 0: chPre = jsString[n-1] except ValueError: raise ParseException('ElementStringParse visit overstep the boundary 2|||' + jsString[n:n+20]) if chNext != '"' and chNext != '\\' and chPre != '\\': raise ParseException('ElementStringParse json error 1|||' + jsString[n:n+20]) strRet = strRet + ch else: if ch == '"': if not (n > 0 and jsString[n-1] == '\\'): bEnd = True # 找到结束” if bNormalString: break continue strRet = strRet + ch else: if not bVal: if ch != ' ' and ch != '\t' and ch != ':': # "到:之间有其他字符 raise ParseException('ElementStringParse json error 2|||' + jsString[n:n+20]) else: if ch == ':': # //字段名结尾是: break else: if ch != ' ' and ch != '\t' and ch != ',' and ch != '}': # "到,或者}之间有其他字符 raise ParseException('ElementStringParse json error 3|||' + jsString[n:n+20]) else: if ch == ',' or ch == '}': # 值结尾可能是,也可能是} if ch == '}': bEndObject = True break return strRet, nLenString-nRet+1, bEndObject # 校验空白字符是否正确 def CheckBlankByte(self, jsString, nCheckPos): if 0 == jsString.find('666'): jsString = jsString chTmp = '' bEndObject = False bCorrectFormat = True for n in range(len(jsString)): chTmp = jsString[n] if nCheckPos == 0:#[到{之间 if chTmp != '{' and chTmp != ' ' and chTmp != '\t': bCorrectFormat = False break if chTmp == '{': break elif nCheckPos == 1 or nCheckPos == 3:#1、{到"之间 3、,到“之间 if chTmp != '"' and chTmp != ' ' and chTmp != '\t': bCorrectFormat = False break if chTmp == '"': break elif nCheckPos == 2:#:到数值之间,可能是"也可能是{ if chTmp != '[' and chTmp != '{' and chTmp != '"' and chTmp != ' ' and chTmp != '\t': bCorrectFormat = False break if chTmp == '[' or chTmp == '{' or chTmp == '"': break elif nCheckPos == 4:#子object结束,可能是,也可能是} if chTmp != ',' and chTmp != '}' and chTmp != ' ' and chTmp != '\t': bCorrectFormat = False break if chTmp == ',' or chTmp == '}': if chTmp == '}': bEndObject = True break elif nCheckPos == 5:#array节点结束,可能是,也可能是] if chTmp != ',' and chTmp != ']' and chTmp != ' ' and chTmp != '\t': bCorrectFormat = False break if chTmp == ',' or chTmp == ']': break elif nCheckPos == 6: # array节点结束到下一个节点开始只能是,到{ if chTmp != ',' and chTmp != '{' and chTmp != ' ' and chTmp != '\t': bCorrectFormat = False break if chTmp == ',' or chTmp == '{': break elif nCheckPos == 7: if chTmp != ']' and chTmp != ' ' and chTmp != '\t': bCorrectFormat = False break if chTmp == ']': bEndObject = True break return bCorrectFormat, bEndObject def printSingleObject(obj): print repr(obj) def printObject(obj, nTab): print '\t' * (nTab-1), '{' for i in obj.dictResult: if isinstance(obj.dictResult[i], list): printList(obj.dictResult[i], nTab + 1, i) elif isinstance(obj.dictResult[i], JsonHelper): printObject(obj.dictResult[i], nTab + 1) else: print '\t' * nTab, '"{}":\t{}'.format(i, repr(obj.dictResult[i])) print '\t' * (nTab - 1), '}' def printList(lst, nTab, strEleName = ''): if 0==len(strEleName): strEleName = '' else: strEleName = '"{}":'.format(strEleName) print '\t' * (nTab - 1), strEleName, '[' for i in lst: if isinstance(i, list): printList(i, nTab + 1) elif isinstance(i, JsonHelper): printObject(i, nTab + 1) print '\t' * (nTab - 1), ']' if __name__ == "__main__": try: str = ' [{\ "userId": "111111111111",\ "userName": "张三李四王麻子",\ "us%e\\"r\\\\%c Pho\'n:e": "13030\\"rc Pho\'n:303131",\ "userFollow": 86,\ "userFuns": -2e-5,\ "userFavorite": true,\ "userEBuy": null,\ "userComment": "999",\ "userNews": "3",\ "userPoints": "3",\ "userOrder": "8",\ "userLocation": "5",\ "isSeller": "1",\ "aaa": {\ "userId": "2222222222222",\ "userPhone": "13030303131",\ "userFollow": "86",\ "userFuns": "5125",\ "userFavorite": "615",\ "userEBuy": "103",\ "userComment": "999",\ "userNews": "3",\ "userPoints": "3",\ "userOrder": "8",\ "userLocation": "5",\ "isSeller": "1",\ "bbb": {\ "userId": "333333333333",\ "userName": "张三李四王麻子",\ "userPhone": "1303\\"0303131",\ "userFollow": 86,\ "userFuns": 66.66,\ "userFavorite": true,\ "userEBuy": null,\ "userC\\"omment": "999",\ "userNews": "3",\ "userPoints": "3",\ "userOrder": "8",\ "userLocation": "5",\ "isSeller": "1"\ },\ "ccc": {\ "userId": "444444444444",\ "userName": "张三李四王麻子",\ "userPhone": "13030303131",\ "userFollow": "86",\ "userFuns": "5125",\ "userFavorite": "615",\ "userEBuy": "103",\ "userComment": "999",\ "userNews": "3",\ "userPoints": "3",\ "userOrder": "8",\ "userLocation": "5",\ "isSeller": "1",\ "sdfsdf": {\ "Name": "55555555555555",\ "X": "85",\ "Y": "78",\ "Z": "10"\ }\ }\ }\ },\ {\ "userId": "66666666666666",\ "userName": "张三李四王麻子",\ "userPhone": "13030303131",\ "userFollow": "86",\ "userFuns": "5125",\ "userFavorite": "615",\ "userEBuy": "103",\ "userComment": "999",\ "userNews": "3",\ "userPoints": "3",\ "userOrder": "8",\ "userLocation": "5",\ "isSeller": "1",\ "Values": [{\ "Name": "7777777777777777",\ "X": "85",\ "Y": "78",\ "Z": "10"\ },\ {\ "Name": "8888888888888888",\ "X": "85",\ "Y": "78",\ "Z": "10"\ }\ ]\ }\ ]' # str = ' [ {\ # "Name": "8888888888888888",\ # "X": "85",\ # "Y": "78",\ # "Z": null\ # }]' # str = ' {"bbb": {\ # "userId": "333333333333",\ # "userName": "张三李四王麻子",\ # "userPhone": "1303\\"0303131",\ # "userFollow": 86,\ # "userFuns": 66.66,\ # "userFavorite": true,\ # "userEBuy": null,\ # "userC\\"omment": "999",\ # "userNews": "3",\ # "userPoints": "3",\ # "userOrder": "8",\ # "userLocation": "5",\ # "isSeller": "1"\ # },\ # "ccc": {\ # "userId": "444444444444",\ # "userName": "张三李四王麻子",\ # "userPhone": "13030303131",\ # "userFollow": "86",\ # "userFuns": "5125",\ # "userFavorite": "615",\ # "userEBuy": "103",\ # "userComment": "999",\ # "userNews": "3",\ # "userPoints": "3",\ # "userOrder": "8",\ # "userLocation": "5",\ # "isSeller": "1",\ # "sdfsdf": {\ # "Name": "55555555555555",\ # "X": "85",\ # "Y": "78",\ # "Z": "10"\ # }\ # }}' # str = '[{\ # "Name": "7777777777777777",\ # "w": false,\ # "X": 85,\ # "Y": 78.66,\ # "Z": "10",\ # "we": null,\ # "us%e\\"r\\\\%c Pho\'n:e": "13030\\"rc Pho\'n:303131"\ # },\ # {\ # "Name": "8888888888888888",\ # "q": "85",\ # "w": "78",\ # "e": "10",\ # "sdfsdf": {\ # "Name": "9999999999999",\ # "a": "85",\ # "b": "78",\ # "c": "10"\ # }\ # }\ # ]' # str = '{\ # "Name": "7777777777777777",\ # "w": false,\ # "X": 85,\ # "Y": 78.66,\ # "Z": "10",\ # "we": null,\ # "us%e\\"r\\\\%c Pho\'n:e": "13030\\"rc Pho\'n:303131"\ # }' # str = '666666' jsObj = JsonHelper() jsObj.JsonParse(str) if jsObj.jsObjectType == JsonBaseType.object_type: printObject(jsObj.dictResult[''], 1) elif jsObj.jsObjectType == JsonBaseType.array_type: printList(jsObj.dictResult[''], 1) else: printSingleObject(jsObj.dictResult['']) print('json parsed success.') except ParseException as e: print(e) print('json parsed failed.') # debug(21361583)
python json 解析
最新推荐文章于 2023-12-28 11:42:59 发布