为达到高速的全规则匹配(一条数据要和当前所有规则进行匹配,而不是匹配即跳出),需要一个二叉判断树,二叉判断树的输入要求一个有序的、互斥的判断阈值序列。但是,显示中的业务规则绝大多数都是交叉的,比如企业业务规则,同一个/组IP可能会和多个/组IP有交互,因此要对其建立多条规则,而且组与组之间的范围还非常可能是叠加的。因此,需要程序自动的差分判断范围,以供判断树的生成。
#!/usr/bin/env python
#coding = utf-8
'''
Author: Yang XU
E-mail: xuy1202@gmail.com
'''
class _Segment(object):
__slots__ = ('value', 'isHead', 'isTail')
def __init__(self, value, isHead=False, isTail=False):
self.value = value
self.isHead = isHead
self.isTail = isTail
def __str__(self):
h = 'H' if self.isHead else ''
t = 'T' if self.isTail else ''
return '<%s%s%s>'%(h, self.value, t)
__repr__ = __str__
class Span(object):
__slots__ = ('start', 'end', 'srcs')
def __init__(self, start, end, srcs):
self.start = start
self.end = end
self.srcs = srcs
def __str__(self):
return '<SPAN:%s-%s. srcs:%s>'%(self.start, self.end, str(self.srcs))
__repr__ = __str__
def _segList(sequence):
tmp_set = set()
for span in sequence:
start = span[0]
end = span[1]
tmp_set.add(start)
tmp_set.add(end)
tmp_set = sorted(list(tmp_set))
header = set([span[0] for span in sequence])
tailer = set([span[1] for span in sequence])
seg_list = []
for item in tmp_set:
isHead = item in header
isTail = item in tailer
segment = _Segment(item, isHead, isTail)
seg_list.append(segment)
return seg_list
def _spanList(seg_list):
span_list = []
for seg in seg_list:
# add value-1 before header
if seg.isHead:
if span_list:
span_list.append(seg.value-1)
# add value
span_list.append(seg.value)
# add value if isHead & isTail
if seg.isHead and seg.isTail:
span_list.append(seg.value)
# add value+1 after tailer
if seg.isTail:
span_list.append(seg.value+1)
return span_list
def _filter(span_list, sequence,
getStart=lambda item: item[0],
getEnd=lambda item: item[1],
getTag = lambda item: item
):
def belongs(span, sequence):
s = span[0]
t = span[1]
tag_list = []
for item in sequence:
_s = getStart(item)
_t = getEnd(item)
tag = getTag(item)
if _s<=s<=_t and _s<=t<=_t:
tag_list.append(tag)
return tag_list
tagLists = map(lambda span: belongs(span, sequence), span_list)
return_list = []
for index in xrange(len(tagLists)):
tags = tagLists[index]
span = span_list[index]
if tags:
return_list.append(Span(span[0], span[1], tags))
return return_list
def split(sequence,
getStart=lambda item: item[0],
getEnd=lambda item: item[1],
getTag = lambda item: item
):
sequence_list = [(getStart(obj), getEnd(obj)) for obj in sequence]
seg_list = _segList(sequence_list)
span_list = _spanList(seg_list)
starts = span_list[0::2]
ends = span_list[1::2]
return_list = []
for index in xrange(len(ends)):
start = starts[index]
end = ends[index]
if start > end: continue
return_list.append((start, end))
return_list = _filter(
return_list, sequence,
getStart = getStart,
getEnd = getEnd,
getTag=getTag
)
return return_list
if __name__ == '__main__':
ls = [
[-1, 10],
[3, 7],
[10, 15],
[12, 20],
[22, 22],
[24, 30]
]
s = split(ls)
print
for i in s:
print i
class o(object):
def __init__(self, id, s, e):
self.tag = id
self.start = s
self.end = e
def __str__(self):
return '<%s-%s>'%(self.start, self.end)
__repr__ = __str__
seq = []
count = 1
for span in ls:
seq.append(o(count, span[0], span[1]))
count += 1
seq.sort(cmp = lambda x, y: cmp(x.start, y.start))
s = split(seq,
getStart=lambda item: item.start,
getEnd=lambda item: item.end,
getTag = lambda item: item#.tag
)
print
for i in s:
print i
# result:
# <SPAN:-1-2. srcs:[[-1, 10]]>
# <SPAN:3-7. srcs:[[-1, 10], [3, 7]]>
# <SPAN:8-9. srcs:[[-1, 10]]>
# <SPAN:10-10. srcs:[[-1, 10], [10, 15]]>
# <SPAN:11-11. srcs:[[10, 15]]>
# <SPAN:12-15. srcs:[[10, 15], [12, 20]]>
# <SPAN:16-20. srcs:[[12, 20]]>
# <SPAN:22-22. srcs:[[22, 22]]>
# <SPAN:24-30. srcs:[[24, 30]]>
#
# <SPAN:-1-2. srcs:[<-1-10>]>
# <SPAN:3-7. srcs:[<-1-10>, <3-7>]>
# <SPAN:8-9. srcs:[<-1-10>]>
# <SPAN:10-10. srcs:[<-1-10>, <10-15>]>
# <SPAN:11-11. srcs:[<10-15>]>
# <SPAN:12-15. srcs:[<10-15>, <12-20>]>
# <SPAN:16-20. srcs:[<12-20>]>
# <SPAN:22-22. srcs:[<22-22>]>
# <SPAN:24-30. srcs:[<24-30>]>
同时,输入序列可以是简单的二元组,分为表示start和end,也可以是一个复杂对象,此时需要定义获取对象属性对应到start和end的方法getStart和getEnd。
getTag参数用来决定返回所属原始序列时的值,默认是输入时对象本身。