正则表达式虽然好用但常常容易编写错误,因此产生了测试正则表达式是否正确的需求。
用tkinter写了个程序,内部实现对匹配结果的高亮显示,并能够选择一些常见的匹配修饰符。效果如下:
其实核心部分就是集中在高亮显示这部分,想要将正则匹配得到的span索引转化成tkinter的"行.列"式索引需要花费一些功夫。这部分代码写得比较抓马,应该还有挺多优化空间
代码如下:
from re import finditer, error
from tkinter import (Tk, Label, Button, Checkbutton, IntVar,
messagebox, END, Text, Scrollbar)
def limit_length(command_element):
text = command_element.get('1.0', END).rstrip()
lines_list = iter(text.splitlines())
del text
# 先获取每一行的字数位置
every_line_char_total = [0]
above_line_char_total = 0
while True:
try:
current_line_char = len(next(lines_list))
above_line_char_total = above_line_char_total + current_line_char + 1
every_line_char_total.append(above_line_char_total)
except StopIteration:
break
text_length = every_line_char_total[-1]
text = command_element.get('1.0', END).rstrip()
# 先获取一次文本内容,再删除原来的文本(可能存在旧的高亮需要去除),重新插入
if text_length > 15001:
messagebox.showinfo('提示', '字数太多, 超出限制。\n已自动将文本截取至15000字!')
command_element.delete('1.0', END)
cut_out_text = text[:15000]
command_element.insert('1.0', cut_out_text)
else:
command_element.delete('1.0', END)
command_element.insert('1.0', text)
return every_line_char_total
def binary_search(search_range, target):
left, right = 0, len(search_range) - 1
while True:
if right - left == 1:
return search_range[left], right
mid = (right + left) // 2
if target < search_range[mid]:
right = mid
else:
left = mid
def index_trans(arr: list, span: tuple, if_small_group=True, fixed_row=None, big_group_range=None):
head_target, tail_target = span
# 只有小分组才会得到大分组的固定行,通常较多优先处理
if fixed_row:
per_char_count = arr[fixed_row - 1]
return (f'{fixed_row}.{head_target - per_char_count}',
f'{fixed_row}.{tail_target - per_char_count}')
# 接着判断是大组和还是小组,优先处理小组,数组范围减小
if if_small_group:
big_group_head, big_group_tail = big_group_range
arr = arr[big_group_head: big_group_tail+1]
# 使用二分查找,先确定尾部在第几行,划出上界
tail_pre_char_count, tail_row = binary_search(arr, tail_target)
tail_col = tail_target - tail_pre_char_count
# 再确定头部在第几行,划出下界
# 判断头部和尾部是不是在同一行,这个通常概率比较大
if head_target >= tail_pre_char_count:
head_row = tail_row
head_col = head_target - tail_pre_char_count
else:
arr = arr[: tail_row+1]
head_pre_char_count, head_row = binary_search(arr, head_target)
head_col = head_target - head_pre_char_count
if if_small_group:
pre_line = big_group_range[0]
head_row += pre_line
tail_row += pre_line
return f'{head_row}.{head_col}', f'{tail_row}.{tail_col}'
else:
fixed_row = head_row if head_row == tail_row else None
big_group_range = (head_row-1, tail_row)
return f'{head_row}.{head_col}', f'{tail_row}.{tail_col}', fixed_row, big_group_range
def choose_mode():
value1, value2, value3 = CheckVar1.get(), CheckVar2.get(), CheckVar3.get()
flags = value1 | value2 | value3
match_results_iter, every_line_char_total = load_data(flags)
num = re_mode(match_results_iter, every_line_char_total)
Label_out.config(text=f'提取结果: {num}个')
def cancel_highlight(event):
highlight_text = area_in.get('1.0', END).rstrip()
area_in.delete('1.0', END)
area_in.insert('1.0', highlight_text)
def callback(event, command_element):
command_element.edit_separator()
def undo_command(event, command_element):
command_element.edit_undo()
def highlight_show(match_result, groups_count, every_line_char_total):
# 当只有一个分组时,只显示这个分组;当有多个分组时,需要分别将最大分组和最小分组进行高亮显示
biggest_group_result = match_result.group()
biggest_group_span = match_result.span()
biggest_group_start, biggest_group_end, fixed_row, big_ground_range =\
index_trans(every_line_char_total, biggest_group_span, False)
area_in.delete(biggest_group_start, biggest_group_end)
area_in.insert(biggest_group_start, biggest_group_result, 'tag')
# 插入匹配到的结果集
if not match_result.groups():
area_out.insert(END, biggest_group_result)
groups_count += 1
else:
area_out.insert(END, ','.join(match_result.groups()))
area_out.insert(END, '\n')
for index, smaller_result in enumerate(match_result.groups(), 1):
groups_count += 1
smaller_span = match_result.span(index)
smaller_group_start, smaller_group_end = index_trans(
every_line_char_total, smaller_span, True, fixed_row, big_ground_range)
area_in.delete(smaller_group_start, smaller_group_end)
area_in.insert(smaller_group_start, smaller_result, 'tag2')
return groups_count
def load_data(mode_decorate):
expression = area_expression.get('1.0', END).rstrip()
if expression == '':
return 0
# 对文本内容进行限制,限制完后的内容再重新进行获取
every_line_char_total = limit_length(area_in)
extract_text = area_in.get('1.0', END).rstrip()
area_out.delete('1.0', END)
# 开始进行匹配
try:
match_results_iter = finditer(expression, extract_text, mode_decorate)
return match_results_iter, every_line_char_total
except error as e:
messagebox.showerror('错误', f'表达式语法错误:\n{e}')
return 0
def re_mode(match_results_iter, every_line_char_total):
groups_count = 0
for match_result_iter in match_results_iter:
groups_count = highlight_show(match_result_iter, groups_count, every_line_char_total)
return groups_count
# GUI部分
root = Tk()
root.title("正则表达式测试")
root.geometry('800x633+300+100')
# 创建取消高亮的按键提示
cancel_highlight_info = Label(root, text='鼠标中键取消高亮', font=('宋体', 16), bg='gold')
cancel_highlight_info.place(relx=0.5, rely=0)
# 创建Scrollbar控件
area_in_scrollbar_y = Scrollbar(root)
area_in_scrollbar_y.place(relx=0.98, rely=0.05, relheight=0.47)
expression_scrollbar = Scrollbar(root)
expression_scrollbar.place(relx=0.38, rely=0.76, relheight=0.24)
area_out_scrollbar_y = Scrollbar(root)
area_out_scrollbar_y.place(relx=0.98, rely=0.56, relheight=0.43)
# 输入文本的文本框
Label_in = Label(root, text='请在以下的文本框输入待提取数据的文本', font=('宋体', 16))
Label_in.place(relx=0.01, rely=0)
area_in = Text(root, font=('宋体', 14), autoseparators=True, undo=True, maxundo=30)
area_in.config(yscrollcommand=area_in_scrollbar_y.set)
area_in.place(relx=0.01, rely=0.05, relwidth=0.97, relheight=0.47)
# 对输入文本框的事件绑定
area_in_scrollbar_y.config(command=area_in.yview)
# 释放一次空格键就进行一次压栈。说实话,开始用<space>键发现不行,连续敲击其他字母最后敲空格居然没有反应,虽然想到可以监听空格落下,但不知道怎么做,这下可以做到了哈哈哈
area_in.bind('<KeyRelease-space>', lambda _: callback(_, area_in))
area_in.bind('<Button-2>', cancel_highlight)
area_in.bind('Control-z', lambda _: undo_command(_, area_in))
# 选择模式
CheckVar1 = IntVar()
CheckVar2 = IntVar()
CheckVar3 = IntVar()
check_btn1 = Checkbutton(root, text=' M模式>^&匹配多行 ', font=('宋体', 12), onvalue=8, offvalue=0,
background='silver', variable=CheckVar1, borderwidth=1, relief='groove')
check_btn2 = Checkbutton(root, text=' I模式>忽略大小写 ', font=('宋体', 12), onvalue=2, offvalue=0,
background='silver', variable=CheckVar2, borderwidth=1, relief='groove')
check_btn3 = Checkbutton(root, text=r' S模式>点.匹配"\n" ', font=('宋体', 12), onvalue=16, offvalue=0,
background='silver', variable=CheckVar3, borderwidth=1, relief='groove')
Label_mode = Label(root, text="添加模式(可选)", font=('宋体', 16), bg='gold')
Label_mode.place(relx=0.01, rely=0.52, relheight=0.05, relwidth=0.37)
check_btn1.place(relx=0.01, rely=0.57, relheight=0.05, relwidth=0.37)
check_btn3.place(relx=0.01, rely=0.67, relheight=0.05, relwidth=0.37)
# 输入表达式的文本框
Label_expression = Label(root, text=" 请编写表达式 ", font=('宋体', 16), bg='gold')
Label_expression.place(relx=0.01, rely=0.72)
area_expression = Text(root, width=26, height=7, font=('华文新魏', 14), autoseparators=True, undo=True, maxundo=30)
area_expression.config(yscrollcommand=expression_scrollbar.set)
area_expression.place(relx=0.01, rely=0.76, relheight=0.23, relwidth=0.37)
expression_scrollbar.config(command=area_expression.yview)
area_expression.bind('<KeyRelease-space>', lambda _: callback(_, area_expression))
area_expression.bind('Control-z', lambda _: undo_command(_, area_expression))
# 提取按钮
extract_Button = Button(root, text="一键提取", font=('宋体', 13), bg='cyan', command=choose_mode)
extract_Button.place(relx=0.24, rely=0.72, relwidth=0.136, relheight=0.042)
# 展示提取结果
area_out = Text(root, width=41, height=13, font=('华文新魏', 14), autoseparators=True, undo=True, maxundo=30)
area_out.config(yscrollcommand=area_out_scrollbar_y.set)
area_out.place(relx=0.4, rely=0.57, relwidth=0.58, relheight=0.42)
area_out_scrollbar_y.config(command=area_out.yview)
area_out.bind('<KeyRelease-space>', lambda _: callback(_, area_out))
area_out.bind('Control-z', lambda _: undo_command(_, area_out))
Label_out = Label(root, text=f"提取结果: 0个", font=('宋体', 18), bg='gold')
Label_out.place(relx=0.58, rely=0.52)
# 设置 tag
area_in.tag_config(tagName="tag", background="aquamarine", foreground='crimson',
selectbackground='dodgerblue', selectforeground='white')
area_in.tag_config(tagName='tag2', background='khaki', foreground='crimson',
selectbackground='dodgerblue', selectforeground='white')
root.mainloop()