您可以滥用Python标记器来解析键值列表:#!/usr/bin/env python
from tokenize import generate_tokens, NAME, NUMBER, OP, STRING, ENDMARKER
def parse_key_value_list(text):
key = value = None
for type, string, _,_,_ in generate_tokens(lambda it=iter([text]): next(it)):
if type == NAME and key is None:
key = string
elif type in {NAME, NUMBER, STRING}:
value = {
NAME: lambda x: x,
NUMBER: int,
STRING: lambda x: x[1:-1]
}[type](string)
elif ((type == OP and string == ',') or
(type == ENDMARKER and key is not None)):
yield key, value
key = value = None
text = '''age=12,name=bob,hobbies="games,reading",phrase="I'm cool!"'''
print(dict(parse_key_value_list(text)))
输出
^{pr2}$
您可以使用有限状态机(FSM)来实现更严格的解析器。解析器只使用当前状态和下一个标记来分析输入:#!/usr/bin/env python
from tokenize import generate_tokens, NAME, NUMBER, OP, STRING, ENDMARKER
def parse_key_value_list(text):
def check(condition):
if not condition:
raise ValueError((state, token))
KEY, EQ, VALUE, SEP = range(4)
state = KEY
for token in generate_tokens(lambda it=iter([text]): next(it)):
type, string = token[:2]
if state == KEY:
check(type == NAME)
key = string
state = EQ
elif state == EQ:
check(type == OP and string == '=')
state = VALUE
elif state == VALUE:
check(type in {NAME, NUMBER, STRING})
value = {
NAME: lambda x: x,
NUMBER: int,
STRING: lambda x: x[1:-1]
}[type](string)
state = SEP
elif state == SEP:
check(type == OP and string == ',' or type == ENDMARKER)
yield key, value
state = KEY
text = '''age=12,name=bob,hobbies="games,reading",phrase="I'm cool!"'''
print(dict(parse_key_value_list(text)))