Regular Expression 字符串处理利器
用途:
字符串匹配(字符匹配)
字符串查找
字符串替换
例如:
IP地址是否正确
从网页中揪出email地址
从网页中揪出链接等
类:
java.lang.String
java.util.regex.Pattern
java.util.regex.Matcher
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class Test {
public static void main(String[] args) {
// 简单认识正则表达式
pt("abc".matches("..."));
pt("a8729a".replaceAll("\\d", "-")); // 把数字替换为 —
Pattern p = Pattern.compile("[a-z]{3}");
Matcher m = p.matcher("dfg");
pt(m.matches());
pt("dfg".matches("[a-z]{3}"));
// 初步认识 .(1个) *(0个或多个) +(1个或多个) ?(0个或1个)
pt("a".matches("."));
pt("aa".matches("aa"));
pt("aaaa".matches("a*"));
pt("aaaa".matches("a+"));
pt("".matches("a*"));
pt("aaaa".matches("a?")); //false
pt("".matches("a?"));
pt("a".matches("a?"));
pt("214523145234532".matches("\\d{3,100}"));
pt("192.168.0.aaa".matches("\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}"));//false
pt("192".matches("[0-2][0-9][0-9]"));
//范围
pt("a".matches("[abc]"));
pt("a".matches("[^abc]")); //false
pt("A".matches("[a-zA-Z]"));
pt("A".matches("[a-z]|[A-Z]"));
pt("A".matches("[a-z[A-Z]]"));
pt("R".matches("[A-Z&&[RFG]]"));
//认识 \s \w \d \
pt(" \n\r\t".matches("\\s{4}"));
pt(" ".matches("\\S")); //false
pt("a_8".matches("\\w{3}"));
pt("abc888&^%".matches("[a-z]{1,3}\\d+[&^#%]+"));
pt("\\".matches("\\\\"));
//POSIX Style
pt("a".matches("\\p{Lower}"));
//boundary 边界
pt("hello sir".matches("^h.*"));
pt("hello sir".matches(".*ir$"));
pt("hello sir".matches("^h[a-z]{1,3}o\\b.*"));
pt("hellosir".matches("^h[a-z]{1,3}o\\b.*")); //false
//whilte lines(空白行)
pt(" \n".matches("^[\\s&&[^\\n]]*\\n$"));
pt("aaa 8888c".matches(".*\\d{4}."));
pt("aaa 8888c".matches(".*\\b\\d{4}."));
pt("aaa8888c".matches(".*\\d{4}."));
pt("aaa8888c".matches(".*\\b\\d{4}.")); //false
//email
pt("asdfasdfsafsf@dsdfsdf.com".matches("[\\w[.-]]+@[\\w[.-]]+\\.[\\w]+"));
// matches() find() lookingAt()
Pattern p = Pattern.compile("\\d{3,5}");
String s = "123-34345-234-00";
Matcher m = p.matcher(s);
pt(m.matches());
m.reset();
pt(m.find());
pt(m.start() + "-" + m.end());
pt(m.find());
pt(m.start() + "-" + m.end());
pt(m.find());
pt(m.start() + "-" + m.end());
pt(m.find());
//pt(m.start() + "-" + m.end());
pt(m.lookingAt());
pt(m.lookingAt());
pt(m.lookingAt());
pt(m.lookingAt());
//replacement
Pattern p = Pattern.compile("java", Pattern.CASE_INSENSITIVE);
Matcher m = p.matcher("java Java JAVa JaVa IloveJAVA you hateJava afasdfasdf");
pt(m.replaceAll("JAVA"));
Pattern p = Pattern.compile("java", Pattern.CASE_INSENSITIVE);
Matcher m = p.matcher("java Java JAVa JaVa IloveJAVA you hateJava afasdfasdf");
StringBuffer buf = new StringBuffer();
int i=0;
while(m.find()) {
i++;
if(i%2 == 0) {
m.appendReplacement(buf, "java");
} else {
m.appendReplacement(buf, "JAVA");
}
}
m.appendTail(buf);
pt(buf);
//group 小括号
Pattern p = Pattern.compile("(\\d{3,5})([a-z]{2})");
String s = "123aa-34345bb-234cc-00";
Matcher m = p.matcher(s);
while(m.find()) {
pt(m.group());
pt(m.group(1));
pt(m.group(2));
}
//qulifiers
Pattern p = Pattern.compile(".{3,10}[0-9]");
String s = "aaaa5bbbb6";
Matcher m = p.matcher(s);
if(m.find())
pt(m.start() + "-" + m.end());
else
pt("not match!");
//non-capturing groups
Pattern p = Pattern.compile(".{3}(?!a)");
String s = "444a66b";
Matcher m = p.matcher(s);
while(m.find()) {
pt(m.group());
}
//back refenrences
Pattern p = Pattern.compile("(\\d(\\d))\\2");
String s = "122";
Matcher m = p.matcher(s);
pt(m.matches());
//flags的简写
Pattern p = Pattern.compile("java", Pattern.CASE_INSENSITIVE);
pt("Java".matches("(?i)(java)"));
}
public static void pt(Object o) {
System.out.println(o);
}
}
下面总结一下基本的正则表达式的meta character以及它们含义:
. 匹配任意一个字符 $ 匹配一行的结尾 ^ 匹配一行的开头(在[]里面表示否定)
{} 定义了一个范围 [] 定义了一个字符类 () 定义了一个组
*前面出现0次以上 + 前面匹配一次以上 ?前面出现0次或一次
\ 后面的字符不会看作meta character \w 字母数字下划线 \W 非字母数字下划线
\d 单个数字 \D单个非数字 | 或,二者之一 && 与操作符 \b单词边界
下面看看几个简单的例子:
[abc] a、b 或 c(简单类)
[^abc] 任何字符,除了a、b 或 c(否定)
[a-zA-Z] a 到 z 或 A 到 Z,两头的字母包括在内(范围)
[a-d[m-p]] a 到 d 或 m 到 p:[a-dm-p](并集)
[a-z&&[def]] d、e 或 f(交集)
[a-z&&[^bc]] a 到 z,除了 b 和 c:[ad-z](减去)
[a-z&&[^m-p]] a 到 z,而非 m 到 p:[a-lq-z](减去)