按 sep 切分字符串,并忽略被引号框柱的部分中的 sep。
from typing import List
def split_by_ignore_quote(s: str, sep: str, quote_mark=None, escape_mark="\\") -> List[str]:
"""按 sep 切分字符串,并忽略被引号框柱的部分中的 sep
Parameters
----------
s : str
目标字符串
sep : str
分隔符
quote_mark : Set[str], default = {"'", "\""}
引号符
escape_mark : str, default = "\\"
转义符
Returns
-------
List[str]
切分后的字符串列表
Examples
--------
>>> split_by_ignore_quote(r"1,'2,3',4", sep=",")
['1', "'2,3'", '4']
>>> split_by_ignore_quote(r"1,\\'2,3,4", sep=",")
['1', "'2", '3', '4']
>>> split_by_ignore_quote(r"1,'2,3,4", sep=",")
['1', "'2,3,4"]
"""
if quote_mark is None:
quote_mark = {"'", "\""}
assert escape_mark not in quote_mark, "escape mark in quote mark"
assert sep not in quote_mark, "separator mark in quote mark"
assert escape_mark != sep, "escape mark equal separator mark"
quote = False # 是否在引号中
escape = False # 前一个字符是否是转义符
res = [[]]
for ch in s:
# 上一个字符是转义符:直接添加当前字符,并将转义标识置为 False
if escape is True:
res[-1].append(ch)
escape = False
# 当前字符是转义符:不添加当前字符,并将转义标识置为 True
elif ch in escape_mark:
escape = True
# 当前字符不是转义符,上一个字符也不是转义符
else:
# 当前不在括号范围内,且当前字符为分隔符:执行一次切分
if ch == sep and quote is False:
res.append([]) # 执行一次切分
else:
res[-1].append(ch)
# 当前在引号范围中:直接添加当前字符,并判断是否为引号,如果为引号则调整引号范围
if ch in quote_mark:
quote = not quote
return ["".join(item) for item in res]