正则表达式 | 规则 | 可以匹配 |
A | 指定字符 | A |
\u548c | 指定Unicode字符 | 和 |
. | 任意字符 | a, b, &, 0, ... |
\d | 0-9 | 0, 1, 2, ..., 9 |
\w | a-z, A-Z, 0-9, _ | a, A, 0, _, ... |
\s | 空格、tab键 | “ ” |
\D | 非数字 | a, A, &, _, ... |
\W | 非\w | &, @, 中, ... |
\S | 非\s | a, A, &, _, ... |
AB* | 任意个数字符 | A, AB, ABB, ABBB |
AB+ | 至少1个字符 | AB, ABB, ABBB |
AB? | 0个或1个字符 | A, AB |
AB{3} | 指定个数字符 | ABBB |
AB{1, 3} | 指定范围个数字符 | AB, ABB, ABBB |
AB{2,} | 至少n个字符 | ABB, ABBB, ... |
AB{0, 3} | 最多n个字符 | A, AB, ABB, ABBB |
// java版本:
public class test {
// 区号(3-4个数字)-电话号码(6-8个数字)
public static boolean isValidTel(String s) {
//return s.matches("\\d{3,4}\\-\\d{6,8}");
//国内区号以0开头
return s.matches("0\\d{2,3}\\-\\d{6,8}");
}
public static void main(String[] args) {
System.out.println(isValidTel("010-123456"));
System.out.println(isValidTel("0123-12345678"));
System.out.println(isValidTel("123-0123456"));
System.out.println("------");
System.out.println(isValidTel("010#12345678"));
System.out.println(isValidTel("010X12345678"));
System.out.println(isValidTel("01-12345678"));
System.out.println(isValidTel("01234-12345678"));
System.out.println(isValidTel("01A-12345678"));
System.out.println(isValidTel("012-1234567A"));
}
}
true
true
false
------
false
false
false
false
false
false
# python版本:
import re
def isValidTel(str):
# 国内区号以0开头
# 匹配成功 re.match 方法返回一个匹配的对象,否则返回 None
return re.match(r'^0\d{2,3}\-\d{6,8}$', str)
print(isValidTel("010-123456"));
print(isValidTel("0123-12345678"));
print(isValidTel("123-0123456"));
print("------");
print(isValidTel("010#12345678"));
print(isValidTel("010X12345678"));
print(isValidTel("01-12345678"));
print(isValidTel("01234-12345678"));
print(isValidTel("01A-12345678"));
print(isValidTel("012-1234567A"));
# re.match只匹配字符串的开始,如果字符串开始不符合正则表达式,则匹配失败,函数返回None;
# 而re.search匹配整个字符串,直到找到一个匹配
print("------");
print(re.match(r'0\d{2,3}\-\d{6,8}', "1012-1234567A")); # None
print(re.search(r'0\d{2,3}\-\d{6,8}', "1012-1234567A")); # <re.Match object; span=(1, 12), match='012-1234567'>
print("------");
pattern2 = re.compile(r'^0\d{2,3}\-\d{6,8}$')
match2 = pattern2.match("0123-12345678")
if match2:
print(match2.group()) # "0123-12345678"
print(match2.start()) # "0"
print(match2.end()) # "13"
<re.Match object; span=(0, 10), match='010-123456'>
<re.Match object; span=(0, 13), match='0123-12345678'>
None
------
None
None
None
None
None
None
------
None
<re.Match object; span=(1, 12), match='012-12345678'>
------
0123-12345678
0
13
正则表达式 | 规则 | 可以匹配 |
^ | 开头 | 字符串开头 |
$ | 结尾 | 字符串结束 |
[ABC] | [...]内任意字符 | A, B, C |
[A-F0-9xy] | 指定范围的字符 | A, ..., F, 0, ..., 9, x, y |
[^A-F] | 指定范围外的任意字符 | 非A, ..., F |
AB|CD | AB或CD | AB, CD |
AB|CD|EFG | AB或CD或EFG | AB, CD, EFG |
// java版本:
public class test {
// 区号(3-4个数字)-电话号码(6-8个数字)
public static boolean isValidTel(String s) {
//return s.matches("\\d{3,4}\\-\\d{6,8}");
//国内区号以0开头,电话号码不能以0开头
return s.matches("0\\d{2,3}\\-[1-9]\\d{5,7}");
}
// QQ号码
public static boolean isValidQQ(String s) {
return s.matches("^[1-9]\\d{4,9}$");
}
public static void main(String[] args) {
System.out.println(isValidQQ("10000"));
System.out.println(isValidQQ("99999"));
System.out.println(isValidQQ("1234567890"));
System.out.println("-----");
System.out.println(isValidQQ("00001"));
System.out.println("-----");
System.out.println(isValidTel("010-123455"));
System.out.println(isValidTel("0123-12345678"));
System.out.println("-----");
System.out.println(isValidTel("123-12345678"));
System.out.println(isValidTel("010-023456"));
}
}
true
true
true
-----
false
-----
true
true
-----
false
false
# python版本:
import re
def isValidTel(str):
# 国内区号以0开头
# 匹配成功 re.match 方法返回一个匹配的对象,否则返回 None
return re.match(r'^0\d{2,3}\-[1-9]\d{5,7}$', str)
def isValidQQ(str):
# QQ号码
return re.match(r'^[1-9]\d{4,9}$', str)
print(isValidQQ("10000"));
print(isValidQQ("99999"));
print(isValidQQ("1234567890"));
print("-----");
print(isValidQQ("00001"));
print("-----");
print(isValidTel("010-123455"));
print(isValidTel("0123-12345678"));
print("-----");
print(isValidTel("123-12345678"));
print(isValidTel("010-023456"));
<re.Match object; span=(0, 5), match='10000'>
<re.Match object; span=(0, 5), match='99999'>
<re.Match object; span=(0, 10), match='1234567890'>
-----
None
-----
<re.Match object; span=(0, 10), match='010-123455'>
<re.Match object; span=(0, 13), match='0123-12345678'>
-----
None
None
- 示例:判断用户输入的年份是否是19xx年:
- 规则:1 9 0-9 0-9
- 正则:19\d\d
- java字符串:"19\\d\\d"
- java代码:
# java版本: public class test { public static boolean is19xx(String s) { if (s == null){ return false; } return s.matches("19\\d\\d"); } public static void main(String[] args) { System.out.println(is19xx("1900")); System.out.println(is19xx("1911")); System.out.println(is19xx("1999")); System.out.println("------"); System.out.println(is19xx(null)); System.out.println(is19xx("")); System.out.println(is19xx("19")); System.out.println(is19xx("190A")); System.out.println(is19xx("19001")); System.out.println(is19xx("1900s")); System.out.println(is19xx("2900")); System.out.println(is19xx("A900")); } }
- 运行结束:
true true true ------ false false false false false false false false
# python版本: import re def is19xx(str): # 国内区号以0开头 # 匹配成功 re.match 方法返回一个匹配的对象,否则返回 None return re.match(r'19\d\d', str) print(is19xx("1900")); print(is19xx("1911")); print(is19xx("1999")); print("------"); print(is19xx("")); print(is19xx("19")); print(is19xx("190A")); print(is19xx("19001")); print(is19xx("1900s")); print(is19xx("2900")); print(is19xx("A900")); print("------"); pattern2 = re.compile(r'^0\d{2,3}\-\d{6,8}$') match2 = pattern2.match("0123-12345678") if match2: print(match2.group()) # "0123-12345678" print(match2.start()) # "0" print(match2.end()) # "13" <re.Match object; span=(0, 4), match='1900'> <re.Match object; span=(0, 4), match='1911'> <re.Match object; span=(0, 4), match='1999'> ------ None None None <re.Match object; span=(0, 4), match='1900'> <re.Match object; span=(0, 4), match='1900'> None None ------ 0123-12345678 0 13
import java.util.regex.Matcher;
import java.util.regex.Pattern;
// java版本:
public class test {
public static void main(String[] args) {
String regex = "^\\d{3,4}\\-\\d{6,8}$";
System.out.println("010-12345678".matches(regex));
System.out.println("------");
// 反复使用一个正则表达式字符串进行快速匹配效率较低
// (因为字符串首先得编译成Pattern对象)
// 反复使用,可以移动到方法外
Pattern pattern = Pattern.compile("^\\d{3,4}\\-\\d{6,8}$");
Matcher matcher = pattern.matcher("010-12345678");
System.out.println(matcher.matches());
System.out.println(pattern.matcher("021-123456").matches());
System.out.println(pattern.matcher("021#123456").matches());
System.out.println("------");
//使用Matcher.group(n)可以快速提取子串
Pattern pattern1 = Pattern.compile("^(\\d{3,4})\\-(\\d{6,8})$");
Matcher matcher1 = pattern1.matcher("010-12345678");
if (matcher1.matches()){
System.out.println(matcher1.group(0)); // "010-12345678",0表示匹配的整个字符串
System.out.println(matcher1.group(1)); // "010", 1表示匹配的第1个子串
System.out.println(matcher1.group(2)); // "12345678", 2表示匹配的第2个子串
}
}
}
true
------
true
true
false
------
010-12345678
010
12345678
# python版本:
import re
# re.match 尝试从字符串的起始位置匹配一个模式,如果不是起始位置匹配成功的话,match() 就返回 none
regex = "^\d{3,4}\-\d{6,8}$"
print(re.match(regex, "010-12345678"))
print("----")
print(re.match(regex, "021-123456"))
print(re.match(regex, "021#123456"))
print("----")
# re.compile感觉是完全匹配整个字符串
pattern = re.compile(r'^\d{3,4}\-\d{6,8}$')
print(pattern.match('021-123456A'))
print(pattern.match("021-123456"))
print("----")
matchObj = re.match(r'^(\d{3,4})\-(\d{6,8})$',"010-12345678")
if matchObj:
print("matchObj.group() : ", matchObj.group())
print("matchObj.group(1) : ", matchObj.group(1))
print("matchObj.group(2) : ", matchObj.group(2))
else:
print("No match!!")
<re.Match object; span=(0, 12), match='010-12345678'>
----
<re.Match object; span=(0, 10), match='021-123456'>
None
----
None
<re.Match object; span=(0, 10), match='021-123456'>
----
matchObj.group() : 010-12345678
matchObj.group(1) : 010
matchObj.group(2) : 12345678
import java.util.regex.Matcher;
import java.util.regex.Pattern;
// java版本:
public class test {
public static void main(String[] args) {
// "^(\d+)(0*)$"
// 正则表达式默认是使用贪婪匹配:尽可能多的向后匹配
Pattern pattern1 = Pattern.compile("^(\\d+)(0*)$");
Matcher matcher1 = pattern1.matcher("12300");
if (matcher1.matches()){
System.out.println(matcher1.group(1)); // "12300"
System.out.println(matcher1.group(2)); // ""
}
// "^(\d+?)(0*)$"——实现非贪婪匹配
Pattern pattern2 = Pattern.compile("^(\\d+?)(0*)$");
Matcher matcher2 = pattern2.matcher("12300");
if (matcher2.matches()){
System.out.println(matcher2.group(1)); // "123"
System.out.println(matcher2.group(2)); // "00"
}
// 区分?是0个或者1个还是非贪婪匹配
// "^(\\d??)(0*)$":第一个?是0个或者1个,第2个?是非贪婪匹配
Pattern pattern3 = Pattern.compile("^(\\d??)(0*)$");
Matcher matcher3 = pattern3.matcher("12300");
if (matcher3.matches()){
System.out.println(matcher3.group(1)); // ""
System.out.println(matcher3.group(2)); // "12300"
}
}
}
# python版本:
import re
pattern1 = re.compile(r'^(\d+)(0*)$')
match1 = pattern1.match("12300")
if match1:
print(match1.group(1)) # "12300"
print(match1.group(2)) # ""
# "^(\d+?)(0*)$"——实现非贪婪匹配
pattern2 = re.compile(r'^(\d+?)(0*)$')
match2 = pattern2.match("12300")
if match2:
print(match2.group(1)) # "123"
print(match2.group(2)) # "00"
12300
123
00
import java.util.Arrays;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
// java版本:
public class test {
public static void main(String[] args) {
// 使用正则表达式分割字符串:
// String[] String.split(String regex)
System.out.println(Arrays.toString("a b c".split("\\s"))); // ["a", "b", "c"]
System.out.println(Arrays.toString("a b c".split("\\s"))); // ["a", "b", "", "c"]
System.out.println(Arrays.toString("a b c".split("\\s+"))); // ["a", "b", "c"]
System.out.println(Arrays.toString("a, b ;; c".split("[\\,\\;\\s]+"))); // ["a", "b", "c"]
System.out.println("----");
// 使用正则表达式搜索字符串:
// Matcher.find()
// Pattern.CASE_INSENSITIVE:忽略大小写
String s = "The quick brown fox jumps over the lazy dug.";
// Pattern p = Pattern.compile("the", Pattern.CASE_INSENSITIVE);
// Pattern p = Pattern.compile("\\w+", Pattern.CASE_INSENSITIVE); // 提取出每个单词
Pattern p = Pattern.compile("\\w*o\\w*", Pattern.CASE_INSENSITIVE); // 提取出单词中带有o的单词
Matcher m = p.matcher(s);
while (m.find()){
String sub = s.substring(m.start(), m. end());
System.out.println(sub+", start="+m.start()+", end="+m.end());
}
System.out.println("-----");
// 使用正则表达式替换字符串:
// String.replaceAl()
String s1 = "The quick brown fox jumps over the lazy dug.";
System.out.println(s1.replaceAll("\\s+"," "));
// 这里$1会捕获前面的分组"(\\w+)"
// <b>The<b> <b>quick<b> <b>brown<b> <b>fox<b> <b>jumps<b> <b>over<b> <b>the<b> <b>lazy<b> <b>dug<b>.
System.out.println(s1.replaceAll("\\s+"," ").replaceAll("(\\w+)", "<b>$1<b>"));
}
}
[a, b, c]
[a, b, , c]
[a, b, c]
[a, b, c]
----
brown, start=10, end=15
fox, start=16, end=19
over, start=26, end=30
-----
The quick brown fox jumps over the lazy dug.
<b>The<b> <b>quick<b> <b>brown<b> <b>fox<b> <b>jumps<b> <b>over<b> <b>the<b> <b>lazy<b> <b>dug<b>.
# python版本:
import re
# 使用正则表达式分割字符串:
print(re.split(r'\s',("a b c"))) # ["a", "b", "c"]
print(re.split(r'\s',("a b c"))) # ["a", "b", "", "c"]
print(re.split(r'\s+',("a b c"))) # ["a", "b", "c"]
print(re.split(r'[\,\;\s]+',("a, b c"))) # ["a", "b", "c"]
print("----")
# 使用正则表达式搜索字符串:
s = "The quick brown fox jumps over the lazy dug."
p = re.compile(r'\w*o\w*', re.I) # 提取出单词中带有o的单词
m = p.findall(s) #['brown', 'fox', 'over']
print(m)
it = re.finditer(r'\w*o\w*',s, re.I)
for match in it:
print(match.group())
print("-----")
# 使用正则表达式替换字符串:
s1 = "The quick brown fox jumps over the lazy dug."
s2 = re.sub(r'\s+',' ',s1)
print(s2)
# 这里$1会捕获前面的分组"(\\w+)"
# <b>The<b> <b>quick<b> <b>brown<b> <b>fox<b> <b>jumps<b> <b>over<b> <b>the<b> <b>lazy<b> <b>dug<b>.
print(re.sub(r'(\w+)','<b>\g<1><b>',s2))
['a', 'b', 'c']
['a', 'b', '', 'c']
['a', 'b', 'c']
['a', 'b', 'c']
----
['brown', 'fox', 'over']
brown
fox
over
-----
The quick brown fox jumps over the lazy dug.
<b>The<b> <b>quick<b> <b>brown<b> <b>fox<b> <b>jumps<b> <b>over<b> <b>the<b> <b>lazy<b> <b>dug<b>.