头歌: 第1关:实现json解析器中数字和字符串的解析

代码解释在注释部分,有其他疑问可以评论区提问

本文内容原创,请勿转载!

需要手动敲的代码部分:

(一)处理数字 

(二)处理字符串

 

源码: 

from typing import List
from enum import Enum

"""
全局标量定义来表示符合 JSON 所规定的数据类型
(学生可以使用字典结构表示此结构)
其中:
BEGIN_OBJECT({)
END_OBJECT(})
BEGIN_ARRAY([)
END_ARRAY(])
NULL(null)
NUMBER(数字)
STRING(字符串)
BOOLEAN(true/false)
SEP_COLON(:)
SEP_COMMA(,)
"""

# Signal token
BEGIN_OBJECT = 1
BEGIN_ARRAY = 2
END_OBJECT = 4
END_ARRAY = 8

# variable token
NULL_TOKEN = 16
NUMBER_TOKEN = 32
STRING_TOKEN = 64
BOOL_TOKEN = 128

# separator token
COLON_TOKEN = 256
COMMA_TOKEN = 512

# end signal
END_JSON = 65536

# json index
json_index = 0

def token_parse(json_str: str, json_index: int) -> (tuple, int):
    """
    完成词法解析,返回token
    :param json_str: 输入的json字符串
    :param json_index: json字符串的位置
    :return: 返回已处理好的token和json字符串的位置
    """
    def read_num(json_index:int): #处理数字
        ##           begin           ##
        begin = end = json_index #当前json字符串的位置
        end_str = '\n\t\r,}]' #数字结束的字符串(换行符,制表符,回车符,逗号,右括号)

        while json_str[end] not in end_str:#不到结束符就继续循环读取
            end+=1
        number = json_str[begin:end] #提取出begin到end的字符,赋值给number
        #进行转换
        if '.' in number or 'e' in number or 'E' in number:#根据number中是否含有小数点,e,E,来判断是整数 or 浮点数
            res = float(number) #将变量中存储的字符串转换为浮点数类型
        else:
            res = int(number) #整数
        json_index = end #不可变对象,需重新赋值
        return(NUMBER_TOKEN,res),json_index #返回处理数字后的token序列


    def read_str(json_index: int): #处理字符串
        ##         begin       ##
        json_index +=1 #json字符串的当前位置
       
        while json_index <len(json_str) and json_str[json_index] in '\n\t\r':#跳过空白,换行,制表符tab,移动到下一个非空白字符的位置
             json_index = json_index+1
        begin = end = json_index

        #找到string的范围
        while json_str[end] != '"': #不是双引号
            if json_str[end]=='\\': #反斜杠:向后移动一位
                end+=1
                if json_str[end] not in '"\\/bfnrtu': #双引号,反斜杠,斜杠,退格,换行,回车,Unicode
                    print
            end+=1 #向后移动
        json_index = end+1 #下一个token位置的字符
        rem = json_str[begin:end] #begin到end 提取出字符串
        return(STRING_TOKEN,rem),json_index #返回处理字符串后的token序列
        
    def read_null():
        """
        处理null
        :return: 返回处理null后的token序列
        """
        rem = json_str[json_index: json_index + 4]
        return (NULL_TOKEN, rem), json_index + 4

    def read_bool(s: str):
        """
        处理true,false
        :param s: json字符串
        :return: 返回处理true,false后的token序列
        """
        if s == 't':
            rem = json_str[json_index: json_index + 4]
            return (BOOL_TOKEN, rem), json_index + 4
        else:
            rem = json_str[json_index: json_index + 5]
            return (BOOL_TOKEN, rem), json_index + 5


    if json_index == len(json_str):
        return (END_JSON, None), json_index
    elif json_str[json_index] == '{':
        return (BEGIN_OBJECT, json_str[json_index]), json_index + 1
    elif json_str[json_index] == '}':
        return (END_OBJECT, json_str[json_index]), json_index + 1
    elif json_str[json_index] == '[':
        return (BEGIN_ARRAY, json_str[json_index]), json_index + 1
    elif json_str[json_index] == ']':
        return (END_ARRAY, json_str[json_index]), json_index + 1
    elif json_str[json_index] == ',':
        return (COMMA_TOKEN, json_str[json_index]), json_index + 1
    elif json_str[json_index] == ':':
        return (COLON_TOKEN, json_str[json_index]), json_index + 1
    elif json_str[json_index] == 'n':
        return read_null()
    elif json_str[json_index] == 't' or json_str[json_index] == 'f':
        return read_bool(json_str[json_index])
    elif json_str[json_index] == '"':
        return read_str(json_index)
    if json_str[json_index].isdigit():
        return read_num(json_index)


def tokenizer(json_str: str) -> list:
    """
    生成token序列
    :param json_str:
    :return:
    """
    json_index = 0
    tk, cur_index = token_parse(json_str, json_index)
    token_list = []
    generate_tokenlist(token_list, tk)
    while tk[0] != END_JSON:
        tk, cur_index = token_parse(json_str, cur_index)
        generate_tokenlist(token_list, tk)
    return token_list


def generate_token(tokentype: int, tokenvalue: str) -> tuple:
    """
    生成token结构
    :param tokentype: token的类型
    :param tokenvalue: token的值
    :return: 返回token
    """
    token = (tokentype, tokenvalue)
    return token


def generate_tokenlist(tokenlist: list, token: tuple) -> list:

    tokenlist.append(token)
    return tokenlist


def parse_json(tokenlist: list):

    def check_token(expected: int, actual: int):
        if expected & actual == 0:
            raise Exception('Unexpected Token at position %d' % json_index)

    def parse_json_array():
        """
        处理array对象
        :return: 处理json中的array对象
        """
        global json_index
        expected = BEGIN_ARRAY | END_ARRAY | BEGIN_OBJECT | END_OBJECT | NULL_TOKEN | NUMBER_TOKEN | BOOL_TOKEN | STRING_TOKEN

        while json_index != len(tokenlist):
            json_index += 1
            token = tokenlist[json_index]
            # token_type -> TokenEnum
            token_type = token[0]
            token_value = token[1]
            check_token(expected, token_type)

            # check through each condition
            if token_type == BEGIN_OBJECT:
                array.append(parse_json_object())
                expected = COMMA_TOKEN | END_ARRAY
            elif token_type == BEGIN_ARRAY:
                array.append(parse_json_array())
                expected = COMMA_TOKEN | END_ARRAY
            elif token_type == END_ARRAY:
                return array
            elif token_type == NULL_TOKEN:
                array.append(None)
                expected = COMMA_TOKEN | END_ARRAY
            elif token_type == NUMBER_TOKEN:
                array.append(int(token_value))
                expected = COMMA_TOKEN | END_ARRAY
            elif token_type == STRING_TOKEN:
                # print("array-------------array")
                array.append(token_value)
                expected = COMMA_TOKEN | END_ARRAY
            elif token_type == BOOL_TOKEN:
                token_value = token_value.lower().capitalize()
                array.append({'True': True, 'False': False}[token_value])
                expected = COMMA_TOKEN | END_ARRAY
            elif COMMA_TOKEN:
                expected = BEGIN_ARRAY | BEGIN_OBJECT | STRING_TOKEN | BOOL_TOKEN | NULL_TOKEN | NUMBER_TOKEN
            elif END_JSON:
                return array
            else:
                raise Exception('Unexpected Token at position %d' % json_index)

    def parse_json_object():
        """
        处理json对象
        :return:处理json中的json对象
        """
        global json_index
        expected = STRING_TOKEN | END_OBJECT
        key = None
        while json_index != len(tokenlist):
            json_index += 1
            token = tokenlist[json_index]
            token_type = token[0]
            token_value = token[1]
            # print("expected: ", expected, "token_type: ", token_type, "token_value: ", token_value)
            check_token(expected, token_type)
            if token_type == BEGIN_OBJECT:
                obj.update({key: parse_json_object()})
                expected = COMMA_TOKEN | END_OBJECT
            elif token_type == END_OBJECT:
                return obj
            elif token_type == BEGIN_ARRAY:
                # print("join array")
                obj.update({key: parse_json_array()})
                expected = COMMA_TOKEN | END_OBJECT | STRING_TOKEN
            elif token_type == NULL_TOKEN:
                obj.update({key: None})
                expected = COMMA_TOKEN | END_OBJECT
            elif token_type == STRING_TOKEN:
                pre_token = tokenlist[json_index - 1]
                pre_token_value = pre_token[0]
                # print(pre_token_value)
                if pre_token_value == COLON_TOKEN:
                    value = token[1]
                    obj.update({key: value})
               #      print("----------")
                    expected = COMMA_TOKEN | END_OBJECT
                else:
                    key = token[1]
                    expected = COLON_TOKEN
               #     print("+++++++++")

            elif token_type == NUMBER_TOKEN:
                obj.update({key: int(token_value)})
                expected = COMMA_TOKEN | END_OBJECT
            elif token_type == BOOL_TOKEN:
                token_value = token_value.lower().capitalize()
                obj.update({key: {'True': True, 'False': False}[token_value]})
                expected = COMMA_TOKEN | END_OBJECT
            elif token_type == COLON_TOKEN:
                expected = NULL_TOKEN | NUMBER_TOKEN | BOOL_TOKEN | STRING_TOKEN | BEGIN_ARRAY | BEGIN_OBJECT
            elif token_type == COMMA_TOKEN:
                expected = STRING_TOKEN
            elif token_type == END_JSON:
                return obj
            else:
                raise Exception('Unexpected Token at position %d' % json_index)
    array = []
    obj = {}
    global json_index
    if tokenlist[0][0] == BEGIN_OBJECT:
        return parse_json_object()
    elif tokenlist[0][0] == BEGIN_ARRAY:
        return parse_json_array()
    else:
        raise Exception('Illegal Token at position %d' % json_index)


if __name__ == "__main__":
    raw_data = input()
    jlist = tokenizer(raw_data)
    try:
        jdict = parse_json(jlist)
        print(jdict)
    except BaseException as result:
        print(result)

  • 2
    点赞
  • 12
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值