#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Regular Expression For Consecutive Duplicate Unicode Words
import re
import unittest
# unicode
CONSECUTIVE_PATTERN = u"(\\s*|\\s*\\S+\\s+)([A-Za-z\u00C0-\u1FFF\u2800-\uFFFD]+)\\s+\\2"
# Chinese
# pattern = u".*([\u4e00-\u9fa5]+)\\s+\\1"
def re_match(pattern, s):
if re.match(pattern, s, 0):
return True
else:
return False
class TestPattern(unittest.TestCase):
def test_re_match(self):
self.assertTrue(re_match(CONSECUTIVE_PATTERN, u"你好 你好"))
self.assertTrue(re_match(CONSECUTIVE_PATTERN, u"世界 你好 你好 世界"))
self.assertTrue(re_match(CONSECUTIVE_PATTERN, u"hello hello"))
self.assertTrue(re_match(CONSECUTIVE_PATTERN, u" hello hello"))
self.assertTrue(re_match(CONSECUTIVE_PATTERN, u"world hello hello world"))
self.assertTrue(re_match(CONSECUTIVE_PA
正则表达式匹配连续重复单词Regular Expression For Consecutive Duplicate Unicode Words
最新推荐文章于 2023-03-10 21:39:42 发布