便捷测试正则表达式

正则表达式虽然好用但常常容易编写错误,因此产生了测试正则表达式是否正确的需求。

用tkinter写了个程序,内部实现对匹配结果的高亮显示,并能够选择一些常见的匹配修饰符。效果如下:

2441dce39f5344ae8eed5b66fe3159a3.png

 其实核心部分就是集中在高亮显示这部分,想要将正则匹配得到的span索引转化成tkinter的"行.列"式索引需要花费一些功夫。这部分代码写得比较抓马,应该还有挺多优化空间

代码如下:

from re import finditer, error
from tkinter import (Tk, Label, Button, Checkbutton, IntVar,
                     messagebox, END, Text, Scrollbar)


def limit_length(command_element):
    text = command_element.get('1.0', END).rstrip()
    lines_list = iter(text.splitlines())
    del text
    # 先获取每一行的字数位置
    every_line_char_total = [0]
    above_line_char_total = 0
    while True:
        try:
            current_line_char = len(next(lines_list))
            above_line_char_total = above_line_char_total + current_line_char + 1
            every_line_char_total.append(above_line_char_total)
        except StopIteration:
            break
    text_length = every_line_char_total[-1]
    text = command_element.get('1.0', END).rstrip()
    # 先获取一次文本内容,再删除原来的文本(可能存在旧的高亮需要去除),重新插入
    if text_length > 15001:
        messagebox.showinfo('提示', '字数太多, 超出限制。\n已自动将文本截取至15000字!')
        command_element.delete('1.0', END)
        cut_out_text = text[:15000]
        command_element.insert('1.0', cut_out_text)
    else:
        command_element.delete('1.0', END)
        command_element.insert('1.0', text)
    return every_line_char_total


def binary_search(search_range, target):
    left, right = 0, len(search_range) - 1
    while True:
        if right - left == 1:
            return search_range[left], right
        mid = (right + left) // 2
        if target < search_range[mid]:
            right = mid
        else:
            left = mid


def index_trans(arr: list, span: tuple, if_small_group=True, fixed_row=None, big_group_range=None):
    head_target, tail_target = span
    # 只有小分组才会得到大分组的固定行,通常较多优先处理
    if fixed_row:
        per_char_count = arr[fixed_row - 1]
        return (f'{fixed_row}.{head_target - per_char_count}',
                f'{fixed_row}.{tail_target - per_char_count}')
    # 接着判断是大组和还是小组,优先处理小组,数组范围减小
    if if_small_group:
        big_group_head, big_group_tail = big_group_range
        arr = arr[big_group_head: big_group_tail+1]
    # 使用二分查找,先确定尾部在第几行,划出上界
    tail_pre_char_count, tail_row = binary_search(arr, tail_target)
    tail_col = tail_target - tail_pre_char_count
    # 再确定头部在第几行,划出下界
    # 判断头部和尾部是不是在同一行,这个通常概率比较大
    if head_target >= tail_pre_char_count:
        head_row = tail_row
        head_col = head_target - tail_pre_char_count
    else:
        arr = arr[: tail_row+1]
        head_pre_char_count, head_row = binary_search(arr, head_target)
        head_col = head_target - head_pre_char_count
    if if_small_group:
        pre_line = big_group_range[0]
        head_row += pre_line
        tail_row += pre_line
        return f'{head_row}.{head_col}', f'{tail_row}.{tail_col}'
    else:
        fixed_row = head_row if head_row == tail_row else None
        big_group_range = (head_row-1, tail_row)
        return f'{head_row}.{head_col}', f'{tail_row}.{tail_col}', fixed_row, big_group_range


def choose_mode():
    value1, value2, value3 = CheckVar1.get(), CheckVar2.get(), CheckVar3.get()
    flags = value1 | value2 | value3
    match_results_iter, every_line_char_total = load_data(flags)
    num = re_mode(match_results_iter, every_line_char_total)
    Label_out.config(text=f'提取结果: {num}个')


def cancel_highlight(event):
    highlight_text = area_in.get('1.0', END).rstrip()
    area_in.delete('1.0', END)
    area_in.insert('1.0', highlight_text)


def callback(event, command_element):
    command_element.edit_separator()


def undo_command(event, command_element):
    command_element.edit_undo()


def highlight_show(match_result, groups_count, every_line_char_total):
    # 当只有一个分组时,只显示这个分组;当有多个分组时,需要分别将最大分组和最小分组进行高亮显示
    biggest_group_result = match_result.group()
    biggest_group_span = match_result.span()
    biggest_group_start, biggest_group_end, fixed_row, big_ground_range =\
        index_trans(every_line_char_total, biggest_group_span, False)
    area_in.delete(biggest_group_start, biggest_group_end)
    area_in.insert(biggest_group_start, biggest_group_result, 'tag')
    # 插入匹配到的结果集
    if not match_result.groups():
        area_out.insert(END, biggest_group_result)
        groups_count += 1
    else:
        area_out.insert(END, ','.join(match_result.groups()))
    area_out.insert(END, '\n')
    for index, smaller_result in enumerate(match_result.groups(), 1):
        groups_count += 1
        smaller_span = match_result.span(index)
        smaller_group_start, smaller_group_end = index_trans(
            every_line_char_total, smaller_span, True, fixed_row, big_ground_range)
        area_in.delete(smaller_group_start, smaller_group_end)
        area_in.insert(smaller_group_start, smaller_result, 'tag2')
    return groups_count


def load_data(mode_decorate):
    expression = area_expression.get('1.0', END).rstrip()
    if expression == '':
        return 0
    # 对文本内容进行限制,限制完后的内容再重新进行获取
    every_line_char_total = limit_length(area_in)
    extract_text = area_in.get('1.0', END).rstrip()
    area_out.delete('1.0', END)
    # 开始进行匹配
    try:
        match_results_iter = finditer(expression, extract_text, mode_decorate)
        return match_results_iter, every_line_char_total
    except error as e:
        messagebox.showerror('错误', f'表达式语法错误:\n{e}')
        return 0


def re_mode(match_results_iter, every_line_char_total):
    groups_count = 0
    for match_result_iter in match_results_iter:
        groups_count = highlight_show(match_result_iter, groups_count, every_line_char_total)
    return groups_count


# GUI部分
root = Tk()
root.title("正则表达式测试")
root.geometry('800x633+300+100')


# 创建取消高亮的按键提示
cancel_highlight_info = Label(root, text='鼠标中键取消高亮', font=('宋体', 16), bg='gold')
cancel_highlight_info.place(relx=0.5, rely=0)

# 创建Scrollbar控件
area_in_scrollbar_y = Scrollbar(root)
area_in_scrollbar_y.place(relx=0.98, rely=0.05, relheight=0.47)
expression_scrollbar = Scrollbar(root)
expression_scrollbar.place(relx=0.38, rely=0.76, relheight=0.24)
area_out_scrollbar_y = Scrollbar(root)
area_out_scrollbar_y.place(relx=0.98, rely=0.56, relheight=0.43)

# 输入文本的文本框
Label_in = Label(root, text='请在以下的文本框输入待提取数据的文本', font=('宋体', 16))
Label_in.place(relx=0.01, rely=0)
area_in = Text(root, font=('宋体', 14), autoseparators=True, undo=True, maxundo=30)
area_in.config(yscrollcommand=area_in_scrollbar_y.set)
area_in.place(relx=0.01, rely=0.05, relwidth=0.97, relheight=0.47)

# 对输入文本框的事件绑定
area_in_scrollbar_y.config(command=area_in.yview)

# 释放一次空格键就进行一次压栈。说实话,开始用<space>键发现不行,连续敲击其他字母最后敲空格居然没有反应,虽然想到可以监听空格落下,但不知道怎么做,这下可以做到了哈哈哈
area_in.bind('<KeyRelease-space>', lambda _: callback(_, area_in))
area_in.bind('<Button-2>', cancel_highlight)
area_in.bind('Control-z', lambda _: undo_command(_, area_in))

# 选择模式
CheckVar1 = IntVar()
CheckVar2 = IntVar()
CheckVar3 = IntVar()
check_btn1 = Checkbutton(root, text='   M模式>^&匹配多行    ', font=('宋体', 12), onvalue=8, offvalue=0,
                         background='silver', variable=CheckVar1, borderwidth=1, relief='groove')
check_btn2 = Checkbutton(root, text='   I模式>忽略大小写    ', font=('宋体', 12), onvalue=2, offvalue=0,
                         background='silver', variable=CheckVar2, borderwidth=1, relief='groove')
check_btn3 = Checkbutton(root, text=r'   S模式>点.匹配"\n"   ', font=('宋体', 12), onvalue=16, offvalue=0,
                         background='silver', variable=CheckVar3, borderwidth=1, relief='groove')

Label_mode = Label(root, text="添加模式(可选)", font=('宋体', 16), bg='gold')
Label_mode.place(relx=0.01, rely=0.52, relheight=0.05, relwidth=0.37)
check_btn1.place(relx=0.01, rely=0.57, relheight=0.05, relwidth=0.37)
check_btn3.place(relx=0.01, rely=0.67, relheight=0.05, relwidth=0.37)

# 输入表达式的文本框
Label_expression = Label(root, text="  请编写表达式 ", font=('宋体', 16), bg='gold')
Label_expression.place(relx=0.01, rely=0.72)
area_expression = Text(root, width=26, height=7, font=('华文新魏', 14), autoseparators=True, undo=True, maxundo=30)
area_expression.config(yscrollcommand=expression_scrollbar.set)
area_expression.place(relx=0.01, rely=0.76, relheight=0.23, relwidth=0.37)
expression_scrollbar.config(command=area_expression.yview)
area_expression.bind('<KeyRelease-space>', lambda _: callback(_, area_expression))
area_expression.bind('Control-z', lambda _: undo_command(_, area_expression))

# 提取按钮
extract_Button = Button(root, text="一键提取", font=('宋体', 13), bg='cyan', command=choose_mode)
extract_Button.place(relx=0.24, rely=0.72, relwidth=0.136, relheight=0.042)

# 展示提取结果
area_out = Text(root, width=41, height=13, font=('华文新魏', 14), autoseparators=True, undo=True, maxundo=30)
area_out.config(yscrollcommand=area_out_scrollbar_y.set)
area_out.place(relx=0.4, rely=0.57, relwidth=0.58, relheight=0.42)
area_out_scrollbar_y.config(command=area_out.yview)
area_out.bind('<KeyRelease-space>', lambda _: callback(_, area_out))
area_out.bind('Control-z', lambda _: undo_command(_, area_out))
Label_out = Label(root, text=f"提取结果: 0个", font=('宋体', 18), bg='gold')
Label_out.place(relx=0.58, rely=0.52)
# 设置 tag
area_in.tag_config(tagName="tag", background="aquamarine", foreground='crimson',
                   selectbackground='dodgerblue', selectforeground='white')
area_in.tag_config(tagName='tag2', background='khaki', foreground='crimson',
                   selectbackground='dodgerblue', selectforeground='white')

root.mainloop()

  • 12
    点赞
  • 8
    收藏
    觉得还不错? 一键收藏
  • 1
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值