tttt@TOC
ttttt
import re
'''
堆栈方式,循环查找标签名称
'''
def loopTab(tagName,content):
#需要匹配的标签名称
#tagBegin = "<"+tagName
tagBegin = "("
#tagEnd = "</"+tagName
tagEnd = ")"
tagRegex = r"(\(|\))"
#标签名称的数量决定着,匹配的结尾在哪里
#遇到开头标签,就加一,遇到结尾标签就减一
#直到减到0的时候就是结束的时候
tagNum = 0;
pattern = re.compile(tagRegex,re.M|re.S)
mather = pattern.search(content)
#循环匹配开头标签和结尾标签
while mather:
group = mather.group()
#print(mather.span())
if group == tagBegin:
#开头标签,数量加1
tagNum = tagNum + 1
else:
#结尾标签,数量减1
tagNum = tagNum - 1
#标签数量为0,循环结束,范围结尾处的下标。
if tagNum == 0:
return mather.span()[1]
#下一次循环
print(tagNum)
mather = pattern.search(content,mather.span()[1])
#如果标签不匹配,返回0。
return 0;
'''
平衡组查找
'''
def balenceGroup(regexHead,text):
#解析标签名称
#tabName = re.compile(r"<(.+?)\s",re.S).search(regexHead).group(1)
#使用贪婪模式,尽可能多的找到内容,然后从找到的内容中筛选
#regex = regexHead + ".*</"+tabName+">"
regex = r"\(.*\)"
rst = []
while(True):
mather = re.compile(regex, re.M | re.S).search(text)
if mather:
divContent = mather.group()
a = loopTab(regexHead,divContent)
rst.append(divContent[:a])
text= divContent[a:]
else:
break
return rst
text = ''' with table1 as (
select nvl(a,'') as b
from aaa
),table2 as (
select *
from (select * from bbb) t
) , table3 as (
select * from ccc(
)
, table4 as (
select * from ddd
)
select * from table1
;
'''
regex = r"\(.*\)"
result = balenceGroup(regex,text)
for i in range(len(result)):
print(text.find(result[i]))
print(result)