Java 中的正则表达式

最新推荐文章于 2024-07-31 17:28:36 发布

candyguy242

最新推荐文章于 2024-07-31 17:28:36 发布

阅读量858

点赞数

分类专栏：开发相关文章标签： java regex 正则表达式 string javascript java7

本文链接：https://blog.csdn.net/candyguy242/article/details/7920013

版权

开发相关专栏收录该内容

26 篇文章 0 订阅

订阅专栏

最近项目中有一些地方用到了正则表达式，之前对这个东西了解不多，这次正好多看了一些，也发现正则表达式的一些需要注意地方。

正则总结：
正则表达式的优势是进行样式匹配，而不是具体的逻辑处理;
元字符注意使用英文字符，使用中文符号不会报错，但意义不同了；
零长度匹配的情况；
逆向引用 java正则表达式中使用\+数字，代码中使用$+数字；
java正则引擎目前不支持组命名，java7开始支持该功能；
前向断言和后向断言中的表达式就是具体明确或是长度确定的，原因是正则引擎不会对断言的内容进行回溯操作。

比如下面的demo里展示的一些点：

package demo.regex;
import java.util.regex.Matcher;
import java.util.regex.Pattern;


public class RegexDemo {

	/**
	 * @param args
	 */
	public static void main(String[] args) {
		testZerolength();
		testMatchMode();
		testLineMode();
		testGroup();
		testAssertion();
	}

	private static void testMatchMode() {
		System.out.println();
		System.out.println("test match mode");
		String content = "xfooxxxxxxfoo";
		String regex = ".*foo";
		System.out.println(content);
		System.out.println("greedy mode :" + regex);
//		String regex = "a+";
		Pattern p = Pattern.compile(regex);
		printMatch(content, p);
		regex = ".*?foo";
		System.out.println("lazy mode :" + regex);
		p = Pattern.compile(regex);
		printMatch(content, p);
		regex = ".*+foo";
		System.out.println("possessive mode :" + regex);
		p = Pattern.compile(regex);
		printMatch(content, p);
		
	}

	private static void testZerolength() {
		System.out.println();
		System.out.println("test Zero-length");
		String content = "aaaaabb";
		String regex = "a?";
//		String regex = "a+";
		Pattern p = Pattern.compile(regex);
		printMatch(content, p);
	}

	private static void testAssertion() {
		System.out.println();
		System.out.println("test assertion");
		String content = "前赴后继,前无古人后无来者,前事不忘后事之师";
//		String regex = "(?<=[前])\\w+(?=[后])";  //不好使
//		String regex = "(?<=[前])[^\\s]+(?=[后])"; //贪婪
		String regex = "(?<=[前])[^\\s]+?(?=[后])";
		Pattern p = Pattern.compile(regex);
		printMatch(content, p);
		
		System.out.println("test spilt num");
		String sample = "1234567890";
		sample = sample.replaceAll("^(\\d{1,3})((\\d{3})+)$", "$1,$2");
		System.out.println(sample.replaceAll("(?<=\\d{3})(\\d{3})", ",$1")); // 1,234,567,890
		sample = "123456789";
		sample = sample.replaceAll("^(\\d{1,3})((\\d{3})+)$", "$1,$2");
		System.out.println(sample.replaceAll("(?<=\\d{3})(\\d{3})", ",$1")); // 123,456,789

	}

	private static void testGroup() {
		System.out.println();
		System.out.println("test group");
		String content = "1212";
		String regex = "(\\d\\d)\\1"; 
		System.out.println(regex + " for " + content);
		printMatch(content, Pattern.compile(regex));
		
		content = "中国-CN";
//		String regex = "(\\w+)\\W*(\\w+)"; //不好使
		regex = "([^\\-]+)\\-(\\w+)";
//		printMatch(content, Pattern.compile(regex));
		String result = content.replaceAll(regex, "$2-$1");
//		String result = content.replaceAll(regex, "\\2-\\1");
		System.out.println(content);
		System.out.println(result);
		
		regex = "<.+>";  //贪婪
		content = "<title>This is a demo</title>";
		printMatch(content, Pattern.compile(regex));
		regex = "<.+?>";  //懒惰
		printMatch(content, Pattern.compile(regex));
		
		regex = "<([^>]+?)\\s*?.*?>.*?</\\1>";//逆向引用 java正则表达式中使用\+数字，代码中使用$+数字 
//		regex = "<([^>]+?)>.*?</\\1>";//逆向引用 java正则表达式中使用\+数字，代码中使用$+数字 
		String content1 = "<script language=\"JavaScript\" type=\"text/javascript\"></script>";
		Pattern p = Pattern.compile(regex);
		printMatch(content, p);
		printMatch(content1, p);
	}

	protected static void testLineMode() {
		System.out.println();
		//		如果 multiline 为 false，那么 "^" 匹配字符串的开始位置，而 "$" 匹配字符串的结束位置。
		//		如果 multline 为 true，那么 "^" 匹配字符串开始位置以及 "\n" 或 "\r" 之后的位置，
		//		而 "$" 匹配字符串结束位置以及 "\n" 或 "\r" 之前的位置。
		//
		//		其实很简单，多行模式就是根据 \r 或 \n 把字符串分隔为多个单行模式去分别匹配，关键是要与 ^ 或 $ 配合使用！
				
				String content = "山清水秀\r\n 山穷水尽 \r\n山舞银蛇，原驰蜡象，欲与天公试比高。\r\n高山仰止";
				
//				String regex = "^山.+?"; //懒惰模式
				String regex = "^山.+"; //贪婪模式
				Pattern p1 = Pattern.compile(regex); //singleline
				System.out.println("单行模式:");
				printMatch(content, p1);
				System.out.println();
				System.out.println("多行模式:");
				Pattern p2 = Pattern.compile(regex,Pattern.MULTILINE);//multiline
				printMatch(content, p2);
	}

	protected static void printMatch(String content, Pattern p1) {
		Matcher matcher = p1.matcher(content);
		int i = 1;
		while(matcher.find()){
			System.out.println("match found "+ i +":\""+ matcher.group(0)+"\",start:" + matcher.start()+",end:"+matcher.end());
			i++;
		}
	}

}

运行结果如下：

test Zero-length
match found 1:"a",start:0,end:1
match found 2:"a",start:1,end:2
match found 3:"a",start:2,end:3
match found 4:"a",start:3,end:4
match found 5:"a",start:4,end:5
match found 6:"",start:5,end:5
match found 7:"",start:6,end:6
match found 8:"",start:7,end:7

test match mode
xfooxxxxxxfoo
greedy mode :.*foo
match found 1:"xfooxxxxxxfoo",start:0,end:13
lazy mode :.*?foo
match found 1:"xfoo",start:0,end:4
match found 2:"xxxxxxfoo",start:4,end:13
possessive mode :.*+foo

单行模式:
match found 1:"山清水秀",start:0,end:4

多行模式:
match found 1:"山清水秀",start:0,end:4
match found 2:"山舞银蛇，原驰蜡象，欲与天公试比高。",start:14,end:32

test group
(\d\d)\1 for 1212
match found 1:"1212",start:0,end:4
中国-CN
CN-中国
match found 1:"<title>This is a demo</title>",start:0,end:29
match found 1:"<title>",start:0,end:7
match found 2:"</title>",start:21,end:29
match found 1:"<title>This is a demo</title>",start:0,end:29
match found 1:"<script language="JavaScript" type="text/javascript"></script>",start:0,end:62

test assertion
match found 1:"赴",start:1,end:2
match found 2:"无古人",start:6,end:9
match found 3:"事不忘",start:15,end:18
test spilt num
1,234,567,890
123,456,789

网址：
http://www.java3z.com/cwbwebhome/article/article8/Regex/Java.Regex.Tutorial.html#reg6_2
http://blog.csdn.net/rcom10002/article/category/332382
http://www.blogjava.net/songfei/articles/23686.html

demo下载：http://download.csdn.net/download/candyguy242/4534068