正则表达式高级应用（java语言版）_java正则表达式高阶用法-CSDN博客

本文链接：https://blog.csdn.net/Abit_Go/article/details/87084307

1.边界

package 正则;
import java.util.ArrayList;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class Exp001 {

	public static void main(String[] args) {
		// TODO Auto-generated method stub
		String string= "the cat scattered his food all over the room";
		String regex = "(cat)";
		int num=0;
		Pattern pattern = Pattern.compile(regex);
		Matcher p = pattern.matcher(string);
		
		ArrayList<String> templist = new ArrayList<String>();
		while(p.find()) {
			num++;
			templist.add(p.group());
		}
		System.out.println(num);
	}

}
结果：2

package 正则;
import java.util.ArrayList;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class Exp001 {

	public static void main(String[] args) {
		// TODO Auto-generated method stub
		String string= "the cat scattered his food all over the room";
		String regex = "(\\bcat\\b)";
		Pattern pattern = Pattern.compile(regex);
		Matcher p = pattern.matcher(string);
		
		ArrayList<String> templist = new ArrayList<String>();
		while(p.find()) {
			templist.add(p.group());
		}
		System.out.println(templist.size());
	}

}
结果：1

对比上面两个结果，发现第一个也将scattered这个单词包含进去了。不符合我们需求，所以我们一般使用第二张方法（位置匹配）。

单向区间匹配：

package 正则;
import java.util.ArrayList;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class Exp001 {

	public static void main(String[] args) {
		// TODO Auto-generated method stub
		String string[]= {"caption","cap","cape","capsize","recap"};
		String regex = "(\\bcap)";
		Pattern pattern = Pattern.compile(regex);
		for (String xString: string)
		{
			Matcher p = pattern.matcher(xString);
			if(p.find()) {
				System.out.println(xString);
			}
		}
		
	}

}
结果：
caption
cap
cape
capsize

package 正则;
import java.util.ArrayList;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class Exp001 {

	public static void main(String[] args) {
		// TODO Auto-generated method stub
		String string[]= {"caption","cap","cape","capsize","recap"};
		String regex = "(\\bcap)";
		Pattern pattern = Pattern.compile(regex);
		for (String xString: string)
		{
			Matcher p = pattern.matcher(xString);
			if(p.find()) {
				System.out.println(xString);
			}
		}
		
	}

}
结果：
caption
cap
cape
capsize

下表列出了正则表达式中的边界匹配器

package 正则;
import java.util.ArrayList;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class Exp001 {

	public static void main(String[] args) {
		// TODO Auto-generated method stub
		String string[]= {"caption-s","cap - s","a- b","a -b"};
		String regex = "(\\B-\\B)";
		Pattern pattern = Pattern.compile(regex);
		for (String xString: string)
		{
			Matcher p = pattern.matcher(xString);
			if(p.find()) {
				System.out.println(xString);
			}
		}
		
	}

}
结果：
cap - s

知识点：表明不匹配一个单词边境（既字母数字下划线之间，或者非字母数字下划线之间），请使用\B。

2.字符串边界

package 正则;
import java.util.ArrayList;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class Exp001 {

	public static void main(String[] args) {
		// TODO Auto-generated method stub
		String string= "<?xml version=\"1.0\" encoding=\"utf-8\" ?>"
				+ "wsdl:definitions targetNamespace=\"http://tips.cf\"";
		String regex = "(<\\?xml.*\\?>)";
		Pattern pattern = Pattern.compile(regex);
		Matcher p = pattern.matcher(string);
		if(p.find()) {
			System.out.println(string);
		}
	}

}
结果：
<?xml version="1.0" encoding="utf-8" ?>wsdl:definitions targetNamespace="http://tips.cf"

package 正则;
import java.util.ArrayList;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class Exp001 {

	public static void main(String[] args) {
		// TODO Auto-generated method stub
		String string= "it is not a xml"
				+ "<?xml version=\"1.0\" encoding=\"utf-8\" ?>"
				+ "wsdl:definitions targetNamespace=\"http://tips.cf\"";
		String regex = "(<\\?xml.*\\?>)";
		Pattern pattern = Pattern.compile(regex);
		Matcher p = pattern.matcher(string);
		if(p.find()) {
			System.out.println(string);
		}
	}

}
结果：
<?xml version="1.0" encoding="utf-8" ?>wsdl:definitions targetNamespace="http://tips.cf"

在下面的例子里，虽然匹配到xml的开头部分，但位置完全不对，她匹配到的是第二行而不是第一行

package 正则;
import java.util.ArrayList;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class Exp001 {

	public static void main(String[] args) {
		// TODO Auto-generated method stub
		String string= "it is not a xml"
				+ "<?xml version=\"1.0\" encoding=\"utf-8\" ?>"
				+ "wsdl:definitions targetNamespace=\"http://tips.cf\"";
		String regex = "(^\\s*<\\?xml.*\\?>)";
		Pattern pattern = Pattern.compile(regex);
		Matcher p = pattern.matcher(string);
		if( ! p.find()) {
			System.out.println("这不是xml文件");
		}
	}

}
结果：
这不是xml文件

知识点：(^\\s<\\?xml.\\?>)分析:^\s*将匹配一个字符串的开头和随后的零个或多个空吧字符（这解决了<?xml>标签前允许有空格、制表符、换行符等空白字符的问题）。作为一个整体，此正则表达式还能对合法的空白字符做出妥善处理。

package 正则;

import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class Exp001 {

	public static void main(String[] args) {
		// TODO Auto-generated method stub
		String string= "it is a html file </htMl>as";
		String regex = "(</[hH][tT][Mm][lL]>\\s*$)";
		Pattern pattern = Pattern.compile(regex);
		Matcher p = pattern.matcher(string);
		if( ! p.find()) {
			System.out.println("这不是html文件，html文件标签下面不应该有任何实际内容");
		}
	}

}
结果：
这不是html文件，html文件标签下面不应该有任何实际内容

元字符	作用
^	判断开头
$	判断结尾

知识点：1.我们判断”\\”注释可以使用((?m)^\s\\.*$)。2.(^.$)该表达式几乎能匹配到任何字符，但是在string = “”;的时候不成立。

3.子表达式

分辨文本”[12.159.46.200]”

package 正则;

import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class Exp001 {

	public static void main(String[] args) {
		// TODO Auto-generated method stub
		String string= "[12.159.46.200]";
		String regex = "[(\\d{1,3}\\.){3}\\d{1,3}]";
		Pattern pattern = Pattern.compile(regex);
		Matcher p = pattern.matcher(string);
		if(p.find()) {
			System.out.println(string);
		}
	}

}
结果：
[12.159.46.200]

package 正则;

import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class Exp001 {

	public static void main(String[] args) {
		// TODO Auto-generated method stub
		String string[]= {"ID:042","sex:M","DOB:1967-08-17","Status:Active"};
		String regex = "19|20\\d{2}";
		Pattern pattern = Pattern.compile(regex);
		for (String xString:string) {
			Matcher p = pattern.matcher(xString);
			if(p.find()) {
				System.out.println(xString);
			}
		}
		
	}

}
结果：
DOB:1967-08-17

知识点：我们限定前两位数字是19/20，于是我们只要写19|20就可以二选一，后面的日期直接设定两位{2}。

小问题：如果我们匹配一个ipv4地址也可以类似于这样的式子[(\\d{1,3}\\.){3}\\d{1,3}]编写正则表达式吗？

package 正则;

import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class Exp001 {

	public static void main(String[] args) {
		// TODO Auto-generated method stub
		String string[]= {"172.168.1.1","254.254.254.254","789.1.1.1","12.12.12.12"};
		String regex = "(\\d{1,3}\\.){3}\\d{1,3}";
		Pattern pattern = Pattern.compile(regex);
		for (String xString:string) {
			Matcher p = pattern.matcher(xString);
			if(p.find()) {
				System.out.println(xString);
			}
		}
		
	}

}
结果：
172.168.1.1
254.254.254.254
789.1.1.1
12.12.12.12

以上的结果明显大错特错！Ipv4某一位怎么可能大于255！

package 正则;

import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class Exp001 {

	public static void main(String[] args) {
		// TODO Auto-generated method stub
		String string[]= {"172.168.1.1","254.254.254.254","489.1.1.1","12.12.12.12"};
		String regex = "(((\\d{1,2})|(1\\d{2})|(2[0-4]\\d)|(25[0-5]))\\.){3}"
				+ "(((\\d{1,2})|(1\\d{2})|(2[0-4]\\d)|(25[0-5])))";
		for (String xString:string) {
			if(xString.matches(regex)) {
				System.out.println(xString);
			}
		}
		
	}

}
结果：
172.168.1.1
254.254.254.254
12.12.12.12

//由于用上面的方法有bug所以我换了一种方法。

4.1回溯引用有什么用

package 正则;

import java.util.ArrayList;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class Exp001 {

	public static void main(String[] args) {
		// TODO Auto-generated method stub
		String string = "<H1>Welcome</H1>"
				+ "<H2> my </H3>"
				+ "<H3> Homepage</H3>";
		String regex = "<[Hh]1>.*</[Hh]1>";
		Pattern p = Pattern.compile(regex);
		
		ArrayList<String> temp = new ArrayList<String>();
		Matcher fMatcher = p.matcher(string);
		while(fMatcher.find()) {
			temp.add(fMatcher.group());
		}
		System.out.println(xString);
	}

}
结果：
<H1>Welcome</H1>

知识点：<[Hh]1>.*</[Hh]1>只能匹配一级标题所以我们可以改进，再此基础上添加一个字符集合进去。

package 正则;

import java.util.ArrayList;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class Exp001 {

	public static void main(String[] args) {
		// TODO Auto-generated method stub
		String string = "<H1>Welcome</H1>"
				+ "<H2> my </H3>"
				+ "<H3> Homepage</H3>";
		String regex = "<[Hh][1-3]>.*</[Hh][1-3]>";
		Pattern p = Pattern.compile(regex);
		
		ArrayList<String> temp = new ArrayList<String>();
		Matcher fMatcher = p.matcher(string);
		while(fMatcher.find()) {
			temp.add(fMatcher.group());
		}
		System.out.println(temp);
		
		
	}

}
结果：
[<H1>Welcome</H1><H2> my </H3><H3> Homepage</H3>]

4.2回溯引用匹配

package 正则;

import java.util.ArrayList;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class Exp001 {

	public static void main(String[] args) {
		// TODO Auto-generated method stub
		String string = "this is of of are here here they are are";
		String regex = "[ ]+(\\w+)[ ]+\\1";
		Pattern p = Pattern.compile(regex);
		
		ArrayList<String> temp = new ArrayList<String>();
		Matcher fMatcher = p.matcher(string);
		while(fMatcher.find()) {
			temp.add(fMatcher.group());
		}
		System.out.println(temp);
	}

}
结果：
[ of of,  here here,  are are]

知识点：[ ]+匹配一个或者多个空格，\w+匹配一个胡哦多个字母数字字符，[ ]+匹配随后的空格。注意\\w+是括号里的，他是一个表达式。最后一个是\\1，这是一个回溯引用，而他应用的正是前面划分出去的那个子表达式：当(\\w+)匹配到单词of的时候，\\1也匹配的单词of。

4.3回溯引用在替换操作中的应用

package 正则;

import java.util.ArrayList;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class Exp001 {

	public static void main(String[] args) {
		// TODO Auto-generated method stub
		String string = "hello,ben@163.com is my e-mail address";
		String regex1 = "(\\w+[\\w\\.]*@[\\w\\.]+\\.\\w+)";
		String regex2 = "(<A HREF=\"mailto:$1\">$1</A>)";
		Pattern p = Pattern.compile(regex1);
		Matcher fMatcher = p.matcher(string);
		StringBuffer sb = new StringBuffer();
		fMatcher.reset();
		while(fMatcher.find()) {
			fMatcher.appendReplacement(sb, regex2);
		}
		fMatcher.appendTail(sb);
		System.out.println(sb.toString());
	}

}

结果：
hello,(<A HREF="mailto:ben@163.com">ben@163.com</A>) is my e-mail address

知识点：建议看一下这个博客http://blog.csdn.net/jiafu1115/article/details/6663812

package 正则;

import java.util.ArrayList;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class Exp001 {

	public static void main(String[] args) {
		// TODO Auto-generated method stub
		String string[] = {"313-555-1253","343-555-3455","789-567-4565"};
		String regex1 = "(\\d{3})(-)(\\d{3})(-)(\\d{4})";
		String regex2 = "($1)-$3-$5";
		Pattern p = Pattern.compile(regex1);
		for(String str:string) {
			Matcher fMatcher = p.matcher(str);
			StringBuffer sb = new StringBuffer();
			fMatcher.reset();
			while(fMatcher.find()) {
				fMatcher.appendReplacement(sb, regex2);
			}
			System.out.println(sb.toString());
		}
		
		
	}

}
结果：
(313)-555-1253
(343)-555-3455
(789)-567-4565

4.4大小写转换

package 正则;

import java.util.ArrayList;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class Exp001 {

	public static void main(String[] args) {
		// TODO Auto-generated method stub
		String string[] = {"<H1>abc<H1>","<h3>bcd<h2>","<h4>fgh<H4>"};
		String regex1 = "(<[hH]\\d>)(.*?)(<[hH]\\d>)";
		String regex2 = "$1\\U$2\\E$3";
		Pattern p = Pattern.compile(regex1);
		for(String str:string) {
			Matcher fMatcher = p.matcher(str);
			StringBuffer sb = new StringBuffer();
			fMatcher.reset();
			while(fMatcher.find()) {
				fMatcher.appendReplacement(sb, regex2);
			}
			System.out.println(sb.toString());
		}
		
		
	}

}
结果：
<H1>UabcE<H1>
<h3>UbcdE<h2>
<h4>UfghE<H4>

5. 向前查找

package 正则;

import java.util.ArrayList;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class Exp001 {

	public static void main(String[] args) {
		// TODO Auto-generated method stub
		String string[] = {"http://www.a.com","http://wap.b.com","ftp://ftp.c.cn"};
		String regex1 = ".+(?=:)";
		Pattern p = Pattern.compile(regex1);
		for(String str:string) {
			Matcher fMatcher = p.matcher(str);
			while(fMatcher.find()) {
				System.out.println(str);
			}
		}
	}

}
结果：
http://www.a.com
http://wap.b.com
ftp://ftp.c.cn

知识点：任何一个表达式都可以转换为一个向前查找表达式，只要给它加上一个”?=”前缀即可。

package 正则;

import java.util.ArrayList;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class Exp001 {

	public static void main(String[] args) {
		// TODO Auto-generated method stub
		String string[] = {"a:$32.3","b:$332.3","c:$3452.3"};
		String regex1 = "\\$[0-9.]+";
		//String regex1 = "[0-9.]+";
		Pattern p = Pattern.compile(regex1);
		for(String str:string) {
			Matcher fMatcher = p.matcher(str);
			while(fMatcher.find()) {
				System.out.println(str);
			}
		}
	}

}
结果：
a:$32.3
b:$332.3
c:$3452.3

操作符	作用
(?=)	正向前查找
(?!)	负向前查找
(?<=)	正向后查找
(?<!)	负向后查找

package 正则;

import java.util.ArrayList;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class Exp001 {

	public static void main(String[] args) {
		// TODO Auto-generated method stub
		String string = "i have $20 and i pay $10 to shop,and buy 3pears";
		String regex1 = "(?<=\\$)\\d+";
		ArrayList<String> temp = new ArrayList<String>();
		
		Pattern p = Pattern.compile(regex1);
		Matcher fMatcher = p.matcher(string);
		while(fMatcher.find()) {
			temp.add(fMatcher.group());
		}
		System.out.println(temp);
		
	}

}
结果：
[20, 10]

package 正则;

import java.util.ArrayList;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class Exp001 {

	public static void main(String[] args) {
		// TODO Auto-generated method stub
		String string = "i have $20 and i pay $10 to shop,and buy 3 pears for 3 day";
		String regex1 = "\\b(?<!\\$)\\d+\\b";
		ArrayList<String> temp = new ArrayList<String>();
		
		Pattern p = Pattern.compile(regex1);
		Matcher fMatcher = p.matcher(string);
		while(fMatcher.find()) {
			temp.add(fMatcher.group());
		}
		System.out.println(temp);
		
	}

}
结果：
[3, 3]

package 正则;

import java.util.ArrayList;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class Exp001 {

	public static void main(String[] args) {
		// TODO Auto-generated method stub
		String string = "i have $20 and i pay $10 to shop,and buy 3 pears for 3 day";
		String regex1 = "(?<!\\$)\\d+";
		ArrayList<String> temp = new ArrayList<String>();
		
		Pattern p = Pattern.compile(regex1);
		Matcher fMatcher = p.matcher(string);
		while(fMatcher.find()) {
			temp.add(fMatcher.group());
		}
		System.out.println(temp);
		
	}

}
结果：
[0, 0, 3, 3]