正则表达式

正则表达式规则可以匹配
A指定字符A
\u548c指定Unicode字符
.任意字符a, b, &, 0, ...
\d0-90, 1, 2, ..., 9
\wa-z, A-Z, 0-9, _a, A, 0, _, ...
\s空格、tab键“ ”
\D非数字a, A, &, _, ...
\W非\w&, @, 中, ...
\S非\sa, A, &, _, ...
AB*任意个数字符A, AB, ABB, ABBB
AB+至少1个字符AB, ABB, ABBB
AB?0个或1个字符A, AB
AB{3}指定个数字符ABBB
AB{1, 3}指定范围个数字符AB, ABB, ABBB
AB{2,}至少n个字符ABB, ABBB, ...
AB{0, 3}最多n个字符A, AB, ABB, ABBB

 

// java版本:
public class test {

    // 区号(3-4个数字)-电话号码(6-8个数字)
    public static boolean isValidTel(String s) {
        //return s.matches("\\d{3,4}\\-\\d{6,8}");
        //国内区号以0开头
        return s.matches("0\\d{2,3}\\-\\d{6,8}");
    }

    public static void main(String[] args) {
        System.out.println(isValidTel("010-123456"));
        System.out.println(isValidTel("0123-12345678"));
        System.out.println(isValidTel("123-0123456"));

        System.out.println("------");
        System.out.println(isValidTel("010#12345678"));
        System.out.println(isValidTel("010X12345678"));
        System.out.println(isValidTel("01-12345678"));
        System.out.println(isValidTel("01234-12345678"));
        System.out.println(isValidTel("01A-12345678"));
        System.out.println(isValidTel("012-1234567A"));
    }
}

true
true
false
------
false
false
false
false
false
false
# python版本:
import re
 
def isValidTel(str):
	# 国内区号以0开头
	# 匹配成功 re.match 方法返回一个匹配的对象,否则返回 None
	return re.match(r'^0\d{2,3}\-\d{6,8}$', str)
 
print(isValidTel("010-123456"));
print(isValidTel("0123-12345678"));
print(isValidTel("123-0123456"));

print("------");
print(isValidTel("010#12345678"));
print(isValidTel("010X12345678"));
print(isValidTel("01-12345678"));
print(isValidTel("01234-12345678"));
print(isValidTel("01A-12345678"));
print(isValidTel("012-1234567A"));

# re.match只匹配字符串的开始,如果字符串开始不符合正则表达式,则匹配失败,函数返回None;
# 而re.search匹配整个字符串,直到找到一个匹配
print("------");
print(re.match(r'0\d{2,3}\-\d{6,8}', "1012-1234567A")); # None
print(re.search(r'0\d{2,3}\-\d{6,8}', "1012-1234567A")); # <re.Match object; span=(1, 12), match='012-1234567'>

print("------");
pattern2 = re.compile(r'^0\d{2,3}\-\d{6,8}$')
match2 = pattern2.match("0123-12345678")
if match2:
    print(match2.group()) # "0123-12345678"
    print(match2.start()) # "0"
    print(match2.end()) # "13"


<re.Match object; span=(0, 10), match='010-123456'>
<re.Match object; span=(0, 13), match='0123-12345678'>
None
------
None
None
None
None
None
None
------
None
<re.Match object; span=(1, 12), match='012-12345678'>
------
0123-12345678
0
13

正则表达式规则可以匹配
^开头字符串开头
$结尾字符串结束
[ABC][...]内任意字符A, B, C
[A-F0-9xy]指定范围的字符A, ..., F, 0, ..., 9, x, y
[^A-F]指定范围外的任意字符非A, ..., F
AB|CDAB或CDAB, CD
AB|CD|EFGAB或CD或EFGAB, CD, EFG
// java版本:
public class test {

    // 区号(3-4个数字)-电话号码(6-8个数字)
    public static boolean isValidTel(String s) {
        //return s.matches("\\d{3,4}\\-\\d{6,8}");
        //国内区号以0开头,电话号码不能以0开头
        return s.matches("0\\d{2,3}\\-[1-9]\\d{5,7}");
    }

    // QQ号码
    public static boolean isValidQQ(String s) {
        return s.matches("^[1-9]\\d{4,9}$");
    }

    public static void main(String[] args) {
        System.out.println(isValidQQ("10000"));
        System.out.println(isValidQQ("99999"));
        System.out.println(isValidQQ("1234567890"));

        System.out.println("-----");
        System.out.println(isValidQQ("00001"));

        System.out.println("-----");

        System.out.println(isValidTel("010-123455"));
        System.out.println(isValidTel("0123-12345678"));

        System.out.println("-----");
        System.out.println(isValidTel("123-12345678"));
        System.out.println(isValidTel("010-023456"));


    }
}

true
true
true
-----
false
-----
true
true
-----
false
false
# python版本:
import re
 
def isValidTel(str):
	# 国内区号以0开头
	# 匹配成功 re.match 方法返回一个匹配的对象,否则返回 None
	return re.match(r'^0\d{2,3}\-[1-9]\d{5,7}$', str)
 
def isValidQQ(str):
 	# QQ号码
	return re.match(r'^[1-9]\d{4,9}$', str) 	

print(isValidQQ("10000"));
print(isValidQQ("99999"));
print(isValidQQ("1234567890"));

print("-----");
print(isValidQQ("00001"));

print("-----");

print(isValidTel("010-123455"));
print(isValidTel("0123-12345678"));

print("-----");
print(isValidTel("123-12345678"));
print(isValidTel("010-023456"));


<re.Match object; span=(0, 5), match='10000'>
<re.Match object; span=(0, 5), match='99999'>
<re.Match object; span=(0, 10), match='1234567890'>
-----
None
-----
<re.Match object; span=(0, 10), match='010-123455'>
<re.Match object; span=(0, 13), match='0123-12345678'>
-----
None
None
  • 示例:判断用户输入的年份是否是19xx年:
    • 规则:1 9 0-9 0-9
    • 正则:19\d\d
    • java字符串:"19\\d\\d"
    • java代码:
      # java版本:
      public class test {
      
          public static boolean is19xx(String s) {
              if (s == null){
                  return false;
              }
              return s.matches("19\\d\\d");
          }
      
          public static void main(String[] args) {
              System.out.println(is19xx("1900"));
              System.out.println(is19xx("1911"));
              System.out.println(is19xx("1999"));
              System.out.println("------");
              System.out.println(is19xx(null));
              System.out.println(is19xx(""));
              System.out.println(is19xx("19"));
              System.out.println(is19xx("190A"));
              System.out.println(is19xx("19001"));
              System.out.println(is19xx("1900s"));
              System.out.println(is19xx("2900"));
              System.out.println(is19xx("A900"));
          }
      }
      
    • 运行结束:
      true
      true
      true
      ------
      false
      false
      false
      false
      false
      false
      false
      false
      
      # python版本:
      import re
       
      def is19xx(str):
      	# 国内区号以0开头
      	# 匹配成功 re.match 方法返回一个匹配的对象,否则返回 None
      	return re.match(r'19\d\d', str)
       
      print(is19xx("1900"));
      print(is19xx("1911"));
      print(is19xx("1999"));
      print("------");
      print(is19xx(""));
      print(is19xx("19"));
      print(is19xx("190A"));
      print(is19xx("19001"));
      print(is19xx("1900s"));
      print(is19xx("2900"));
      print(is19xx("A900"));
      
      print("------");
      pattern2 = re.compile(r'^0\d{2,3}\-\d{6,8}$')
      match2 = pattern2.match("0123-12345678")
      if match2:
          print(match2.group()) # "0123-12345678"
          print(match2.start()) # "0"
          print(match2.end()) # "13"
      
      
      <re.Match object; span=(0, 4), match='1900'>
      <re.Match object; span=(0, 4), match='1911'>
      <re.Match object; span=(0, 4), match='1999'>
      ------
      None
      None
      None
      <re.Match object; span=(0, 4), match='1900'>
      <re.Match object; span=(0, 4), match='1900'>
      None
      None
      ------
      0123-12345678
      0
      13

import java.util.regex.Matcher;
import java.util.regex.Pattern;

// java版本:
public class test {
    public static void main(String[] args) {

        String regex = "^\\d{3,4}\\-\\d{6,8}$";
        System.out.println("010-12345678".matches(regex));

        System.out.println("------");
        // 反复使用一个正则表达式字符串进行快速匹配效率较低
        // (因为字符串首先得编译成Pattern对象)
        // 反复使用,可以移动到方法外
        Pattern pattern = Pattern.compile("^\\d{3,4}\\-\\d{6,8}$");
        Matcher matcher = pattern.matcher("010-12345678");
        System.out.println(matcher.matches());
        System.out.println(pattern.matcher("021-123456").matches());
        System.out.println(pattern.matcher("021#123456").matches());

        System.out.println("------");
        //使用Matcher.group(n)可以快速提取子串
        Pattern pattern1 = Pattern.compile("^(\\d{3,4})\\-(\\d{6,8})$");
        Matcher matcher1 = pattern1.matcher("010-12345678");
        if (matcher1.matches()){
            System.out.println(matcher1.group(0)); // "010-12345678",0表示匹配的整个字符串
            System.out.println(matcher1.group(1)); // "010", 1表示匹配的第1个子串
            System.out.println(matcher1.group(2)); // "12345678", 2表示匹配的第2个子串
        }
    }
}

true
------
true
true
false
------
010-12345678
010
12345678
# python版本:
import re
 
# re.match 尝试从字符串的起始位置匹配一个模式,如果不是起始位置匹配成功的话,match() 就返回 none
regex = "^\d{3,4}\-\d{6,8}$"
print(re.match(regex, "010-12345678"))
 
print("----")
print(re.match(regex, "021-123456"))
print(re.match(regex, "021#123456"))

print("----")
# re.compile感觉是完全匹配整个字符串
pattern = re.compile(r'^\d{3,4}\-\d{6,8}$')
print(pattern.match('021-123456A'))
print(pattern.match("021-123456"))

print("----")
matchObj = re.match(r'^(\d{3,4})\-(\d{6,8})$',"010-12345678")
if matchObj:
   print("matchObj.group() : ", matchObj.group())
   print("matchObj.group(1) : ", matchObj.group(1))
   print("matchObj.group(2) : ", matchObj.group(2))
else:
   print("No match!!")


<re.Match object; span=(0, 12), match='010-12345678'>
----
<re.Match object; span=(0, 10), match='021-123456'>
None
----
None
<re.Match object; span=(0, 10), match='021-123456'>
----
matchObj.group() :  010-12345678
matchObj.group(1) :  010
matchObj.group(2) :  12345678

import java.util.regex.Matcher;
import java.util.regex.Pattern;

// java版本:
public class test {
    public static void main(String[] args) {

//        "^(\d+)(0*)$"
//        正则表达式默认是使用贪婪匹配:尽可能多的向后匹配
        Pattern pattern1 = Pattern.compile("^(\\d+)(0*)$");
        Matcher matcher1 = pattern1.matcher("12300");
        if (matcher1.matches()){
            System.out.println(matcher1.group(1)); // "12300"
            System.out.println(matcher1.group(2)); // ""
        }

        // "^(\d+?)(0*)$"——实现非贪婪匹配
        Pattern pattern2 = Pattern.compile("^(\\d+?)(0*)$");
        Matcher matcher2 = pattern2.matcher("12300");
        if (matcher2.matches()){
            System.out.println(matcher2.group(1)); // "123"
            System.out.println(matcher2.group(2)); // "00"
        }

        // 区分?是0个或者1个还是非贪婪匹配
        // "^(\\d??)(0*)$":第一个?是0个或者1个,第2个?是非贪婪匹配
        Pattern pattern3 = Pattern.compile("^(\\d??)(0*)$");
        Matcher matcher3 = pattern3.matcher("12300");
        if (matcher3.matches()){
            System.out.println(matcher3.group(1)); // ""
            System.out.println(matcher3.group(2)); // "12300"
        }
    }
}

 

# python版本:
import re

pattern1 = re.compile(r'^(\d+)(0*)$')
match1 = pattern1.match("12300")
if match1:
    print(match1.group(1)) # "12300"
    print(match1.group(2)) # ""

# "^(\d+?)(0*)$"——实现非贪婪匹配
pattern2 = re.compile(r'^(\d+?)(0*)$')
match2 = pattern2.match("12300")
if match2:
    print(match2.group(1)) # "123"
    print(match2.group(2)) # "00"


12300

123
00
import java.util.Arrays;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

// java版本:
public class test {

    public static void main(String[] args) {

        // 使用正则表达式分割字符串:
        // String[] String.split(String regex)
        System.out.println(Arrays.toString("a b c".split("\\s"))); // ["a", "b", "c"]
        System.out.println(Arrays.toString("a b  c".split("\\s"))); // ["a", "b", "", "c"]
        System.out.println(Arrays.toString("a b  c".split("\\s+"))); // ["a", "b", "c"]
        System.out.println(Arrays.toString("a, b ;;  c".split("[\\,\\;\\s]+"))); // ["a", "b", "c"]

        System.out.println("----");
        // 使用正则表达式搜索字符串:
        // Matcher.find()
        // Pattern.CASE_INSENSITIVE:忽略大小写
        String s = "The quick brown fox jumps over the lazy dug.";
//        Pattern p = Pattern.compile("the", Pattern.CASE_INSENSITIVE);
//        Pattern p = Pattern.compile("\\w+", Pattern.CASE_INSENSITIVE); // 提取出每个单词
        Pattern p = Pattern.compile("\\w*o\\w*", Pattern.CASE_INSENSITIVE); // 提取出单词中带有o的单词
        Matcher m = p.matcher(s);
        while (m.find()){
            String sub = s.substring(m.start(), m. end());
            System.out.println(sub+", start="+m.start()+", end="+m.end());
        }

        System.out.println("-----");
        // 使用正则表达式替换字符串:
        // String.replaceAl()
        String s1 = "The    quick brown   fox jumps   over the lazy dug.";
        System.out.println(s1.replaceAll("\\s+"," "));
        // 这里$1会捕获前面的分组"(\\w+)"
        // <b>The<b> <b>quick<b> <b>brown<b> <b>fox<b> <b>jumps<b> <b>over<b> <b>the<b> <b>lazy<b> <b>dug<b>.
        System.out.println(s1.replaceAll("\\s+"," ").replaceAll("(\\w+)", "<b>$1<b>"));
    }
}


[a, b, c]
[a, b, , c]
[a, b, c]
[a, b, c]
----
brown, start=10, end=15
fox, start=16, end=19
over, start=26, end=30
-----
The quick brown fox jumps over the lazy dug.
<b>The<b> <b>quick<b> <b>brown<b> <b>fox<b> <b>jumps<b> <b>over<b> <b>the<b> <b>lazy<b> <b>dug<b>.
# python版本:
import re

# 使用正则表达式分割字符串:
print(re.split(r'\s',("a b c"))) # ["a", "b", "c"]
print(re.split(r'\s',("a b  c"))) # ["a", "b", "", "c"]
print(re.split(r'\s+',("a b  c"))) # ["a", "b", "c"]
print(re.split(r'[\,\;\s]+',("a, b   c"))) # ["a", "b", "c"]

print("----")
# 使用正则表达式搜索字符串:
s = "The quick brown fox jumps over the lazy dug."
p = re.compile(r'\w*o\w*', re.I) # 提取出单词中带有o的单词
m = p.findall(s) #['brown', 'fox', 'over']
print(m)

it = re.finditer(r'\w*o\w*',s, re.I) 
for match in it: 
    print(match.group())

print("-----")
# 使用正则表达式替换字符串:
s1 = "The    quick brown   fox jumps   over the lazy dug."
s2 = re.sub(r'\s+',' ',s1)
print(s2)
# 这里$1会捕获前面的分组"(\\w+)"
# <b>The<b> <b>quick<b> <b>brown<b> <b>fox<b> <b>jumps<b> <b>over<b> <b>the<b> <b>lazy<b> <b>dug<b>.
print(re.sub(r'(\w+)','<b>\g<1><b>',s2))


['a', 'b', 'c']
['a', 'b', '', 'c']
['a', 'b', 'c']
['a', 'b', 'c']
----
['brown', 'fox', 'over']
brown
fox
over
-----
The quick brown fox jumps over the lazy dug.
<b>The<b> <b>quick<b> <b>brown<b> <b>fox<b> <b>jumps<b> <b>over<b> <b>the<b> <b>lazy<b> <b>dug<b>.

 

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值