正则表达式(Java)
底层实现
package com. hspedu. RegExp ;
import java. util. regex. Matcher ;
import java. util. regex. Pattern ;
public class RegExp00 {
public static void main ( String [ ] args) {
String content = "1998年12月8日,第二代Java平台的企业版J2EE发布。" +
"1999年6月,Sun公司发布了第二代Java平台(简称为Java2)的3个版本:" +
"J2ME(Java2 Micro Edition,Java2平台的微型版),应用于移动、无线及" +
"有限资源的环境;J2SE(Java 2 Standard Edition,Java 2平台的标" +
"准版),应用于桌面环境;J2EE(Java 2Enterprise Edition,Java 2平台" +
"的企业版),应用于基于Java的应用服务器。Java 2平台的发布,是Java发展" +
"过程中最重要的一个里程碑,标志着Java的应用开始普及。" ;
String regStr = "(\\d\\d)(\\d\\d)" ;
Pattern pattern = Pattern . compile ( regStr) ;
Matcher matcher = pattern. matcher ( content) ;
while ( matcher. find ( ) ) {
System . out. println ( "找到:" + matcher. group ( 0 ) ) ;
System . out. println ( "找到:" + matcher. group ( 1 ) ) ;
System . out. println ( "找到:" + matcher. group ( 2 ) ) ;
}
}
}
matcher.find
public boolean find ( ) {
int nextSearchIndex = last;
if ( nextSearchIndex == first)
nextSearchIndex++ ;
if ( nextSearchIndex < from)
nextSearchIndex = from;
if ( nextSearchIndex > to ) {
for ( int i = 0 ; i < groups. length; i++ )
groups[ i] = - 1 ;
return false ;
}
return search ( nextSearchIndex) ;
}
matcher.group
public String group ( int group) {
if ( first < 0 )
throw new IllegalStateException ( "No match found" ) ;
if ( group < 0 || group > groupCount ( ) )
throw new IndexOutOfBoundsException ( "No group " + group) ;
if ( ( groups[ group* 2 ] == - 1 ) || ( groups[ group* 2 + 1 ] == - 1 ) )
return null ;
return getSubSequence ( groups[ group * 2 ] , groups[ group * 2 + 1 ] ) . toString ( ) ;
}
基础用法
package com. hspedu. RegExp ;
import java. util. regex. Matcher ;
import java. util. regex. Pattern ;
public class RegExp01 {
public static void main ( String [ ] args) {
String content = "a_bchHKKay66 66sABc_jdj*@fDH \n Y298HU 寒冷 韩顺平 han" ;
String regStr = "寒|韩|han" ;
Pattern pattern = Pattern . compile ( regStr, Pattern . CASE_INSENSITIVE ) ;
Matcher matcher = pattern. matcher ( content) ;
while ( matcher. find ( ) ) {
System . out. println ( "找到 " + matcher. group ( 0 ) ) ;
}
}
}
正则限定符
符号 含义 示例 说明 匹配输入 * 指定字符重复0次或n次(无要求) (abc)* 仅包含任意个abc字符串,相当于\w* abc abcabcabc + 指定字符重复1次或n次(至少1次) m+(abc)* 以至少1个m开头,后接任意个abc的字符串 m mabc mabcabc ? 指定字符重复0次或1次(最多1次) m+abc? 以至少1个m开头,后接ab或abc的字符串 mab mabc mmmab mmabc {n} 只能输入n个字符 [abcd]{3} 由abcd中字母组成的任意长度为3的字符串 abc dbc adc {n,} 指定至少n个匹配 [abcd]{3,} 由abcd中字母组成的任意长度不小于3的字符串 aab dbc aaabdc {n,m} 指定至少n个但不多于m个匹配 [abcd]{3,5} 由abcd中字母组成的任意长度不小于3,不大于5的字符串 abc abcd aaaaa bcdab
package com. hspedu. RegExp ;
import java. util. regex. Matcher ;
import java. util. regex. Pattern ;
public class RegExp02 {
public static void main ( String [ ] args) {
String content = "11111111aaaaaaahello" ;
String regStr = "a1?" ;
Pattern pattern = Pattern . compile ( regStr, Pattern . CASE_INSENSITIVE ) ;
Matcher matcher = pattern. matcher ( content) ;
while ( matcher. find ( ) ) {
System . out. println ( "找到 " + matcher. group ( 0 ) ) ;
}
}
}
正则定位符
符号 含义 示例 说明 匹配输入 ^ 指定起始字符 +[a-z]* 以至少一个数字开头,后接任意个小写字母的字符串 123 6aa 555edf $ 指定结束字符 \\-[a-z]+$ 以一个数字开头后接连字符“-”,并以至少1个小写字母结尾的字符串 1-a \\b 匹配目标字符串的边界 han\\b 这里说的字符串边界指的是子串间有空格,或者是目标字符串的结束位置 hanshunping sphan nnhan \\B 匹配目标字符串的非边界 han\\B 和\b的含义相反 hanshuping sphan nnhan
package com. hspedu. RegExp ;
import java. util. regex. Matcher ;
import java. util. regex. Pattern ;
public class RegExp03 {
public static void main ( String [ ] args) {
String content = "hanshunping sphan nnhan" ;
String regStr = "han\\B" ;
Pattern pattern = Pattern . compile ( regStr) ;
Matcher matcher = pattern. matcher ( content) ;
while ( matcher. find ( ) ) {
System . out. println ( "找到 " + matcher. group ( 0 ) ) ;
}
}
}
捕获分组
常用分组构造形式 说明 (pattern) 非命名捕获。捕获匹配的子字符串。编号为0的第一个捕获是由整个正则表达式模式匹配的文本,其他捕获结果则根据左括号的顺序从1开始自动编号。 (?'name’pattern) 命名捕获。将匹配的子字符串捕获到一个组名称或编号名称中。用于name的字符串不能包含任何标点符号,并且不能以数字开头。可以使用尖括号代替单引号。
package com. hspedu. RegExp ;
import java. util. regex. Matcher ;
import java. util. regex. Pattern ;
public class RegExp04 {
public static void main ( String [ ] args) {
String content = "hanshunping s7789 nn1189han" ;
String regStr = "(?<g1>\\d\\d)(?<g2>\\d\\d)" ;
Pattern pattern = Pattern . compile ( regStr) ;
Matcher matcher = pattern. matcher ( content) ;
while ( matcher. find ( ) ) {
System . out. println ( "找到 " + matcher. group ( 0 ) ) ;
System . out. println ( "第一个分组[编号] " + matcher. group ( "g1" ) ) ;
System . out. println ( "第二个分组[编号] " + matcher. group ( "g2" ) ) ;
}
}
}
非捕获分组
常用分组构造形式 说明 (?:pattern) 匹配pattern但不捕获该匹配的子表达式,即它是一个非捕获匹配,不存储供以后使用的匹配。这对于用“or”字符(|)组合模式部件的情况很有用。 (?=pattern) 它是一个非捕获匹配。 (?!pattern) 该表达式匹配不处于匹配pattern的字符串的起始点的搜索字符串。
package com. hspedu. RegExp ;
import java. util. regex. Matcher ;
import java. util. regex. Pattern ;
public class RegExp05 {
public static void main ( String [ ] args) {
String content = "hello韩顺平教育 Jack韩顺平老师 韩顺平同学hello" ;
String regStr = "韩顺平(?!教育|老师)" ;
Pattern pattern = Pattern . compile ( regStr) ;
Matcher matcher = pattern. matcher ( content) ;
while ( matcher. find ( ) ) {
System . out. println ( "找到 " + matcher. group ( 0 ) ) ;
}
}
}
非贪婪匹配
package com. hspedu. RegExp ;
import java. util. regex. Matcher ;
import java. util. regex. Pattern ;
public class RegExp06 {
public static void main ( String [ ] args) {
String content = "hello1111111" ;
String regStr = "\\d+?" ;
Pattern pattern = Pattern . compile ( regStr) ;
Matcher matcher = pattern. matcher ( content) ;
while ( matcher. find ( ) ) {
System . out. println ( "找到 " + matcher. group ( 0 ) ) ;
}
}
}
应用实例
package com. hspedu. RegExp ;
import java. util. regex. Matcher ;
import java. util. regex. Pattern ;
public class RegExp07 {
public static void main ( String [ ] args) {
String content = "11588889999" ;
String regStr = "^1[1|3|8]\\d{9}$" ;
Pattern pattern = Pattern . compile ( regStr) ;
Matcher matcher = pattern. matcher ( content) ;
if ( matcher. find ( ) ) {
System . out. println ( "true" ) ;
} else {
System . out. println ( "false" ) ;
}
}
}
验证复杂URL
package com. hspedu. RegExp ;
import java. util. regex. Matcher ;
import java. util. regex. Pattern ;
public class RegExp09 {
public static void main ( String [ ] args) {
String content = "https: / / www. bilibili. com/ video/ BV1fh411y7R8 ? ;
String regStr = "^((http|https)://)([\\w-]+\\.)+[\\w-]+(\\/[\\w-?=&/%.#]*)?$" ;
Pattern pattern = Pattern . compile ( regStr) ;
Matcher matcher = pattern. matcher ( content) ;
if ( matcher. find ( ) ) {
System . out. println ( "true" ) ;
} else {
System . out. println ( "false" ) ;
}
}
}
Pattern类matches方法
package com. hspedu. RegExp ;
import java. util. regex. Matcher ;
import java. util. regex. Pattern ;
public class RegExp10 {
public static void main ( String [ ] args) {
String content = "10https://www.bilibili.com/video/BV1fh411y7R8?p=894&vd_source=" ;
String regStr = "((http|https)://)([\\w-]+\\.)+[\\w-]+(\\/[\\w-?=&/%.#]*)?$" ;
Pattern pattern = Pattern . compile ( regStr) ;
Matcher matcher = pattern. matcher ( content) ;
if ( matcher. find ( ) ) {
System . out. println ( "true" ) ;
} else {
System . out. println ( "false" ) ;
}
System . out. println ( Pattern . matches ( regStr, content) ) ;
}
}
Pattern类中的源码:
public static boolean matches ( String regex, CharSequence input) {
Pattern p = Pattern . compile ( regex) ;
Matcher m = p. matcher ( input) ;
return m. matches ( ) ;
}
matcher方法
package com. hspedu. RegExp ;
import java. util. regex. Matcher ;
import java. util. regex. Pattern ;
public class RegExp11 {
public static void main ( String [ ] args) {
String content = "hello edu jack hspedutom hello smith hello" ;
String regStr = "hello edu jack tom hello smith hello" ;
Pattern pattern = Pattern . compile ( regStr) ;
Matcher matcher = pattern. matcher ( content) ;
while ( matcher. find ( ) ) {
System . out. println ( "================" ) ;
System . out. println ( matcher. start ( ) ) ;
System . out. println ( matcher. end ( ) ) ;
System . out. println ( content. substring ( matcher. start ( ) , matcher. end ( ) ) ) ;
}
System . out. println ( "整体匹配 " + matcher. matches ( ) ) ;
regStr = "hspedu" ;
pattern = Pattern . compile ( regStr) ;
matcher = pattern. matcher ( content) ;
String newContent = matcher. replaceAll ( "韩顺平教育" ) ;
System . out. println ( "content = " + content) ;
System . out. println ( "new = " + newContent) ;
}
}
反向引用
分组 可以使用()组成一个比较复杂的匹配模式,一个圆括号的部分我们可以看作一个子表达式/一个分组捕获 把正则表达式中子表达式/分组匹配内容,保存到一个组里,方便后面引用 0代表整个表达式
反向引用 圆括号的内容被捕获后,可以在这个括号后被使用,从而写出一个比较实用的匹配模式
package com. hspedu. RegExp ;
import java. util. regex. Matcher ;
import java. util. regex. Pattern ;
public class RegExp12 {
public static void main ( String [ ] args) {
String content = "hello hspedu11111 hello22 12345-111222333" ;
String regStr = "\\d{5}-(\\d)\\1{2}(\\d)\\2{2}(\\d)\\3{2}" ;
Pattern pattern = Pattern . compile ( regStr) ;
Matcher matcher = pattern. matcher ( content) ;
while ( matcher. find ( ) ) {
System . out. println ( "找到 " + matcher. group ( 0 ) ) ;
}
}
}
替换分割匹配
package com. hspedu. RegExp ;
import java. util. regex. Matcher ;
import java. util. regex. Pattern ;
public class RegExp13 {
public static void main ( String [ ] args) {
String content = "我....我要....学学学学....编程java!" ;
Pattern pattern = Pattern . compile ( "\\." ) ;
Matcher matcher = pattern. matcher ( content) ;
content = matcher. replaceAll ( "" ) ;
System . out. println ( "content=" + content) ;
content = Pattern . compile ( "(.)\\1+" ) . matcher ( content) . replaceAll ( "$1" ) ;
System . out. println ( "content=" + content) ;
}
}
练习题
package com. hspedu. RegExp ;
import java. util. regex. Matcher ;
import java. util. regex. Pattern ;
public class RegExpHomework {
public static void main ( String [ ] args) {
String content01 = "shu@sougo.org.cn" ;
String regStr01 = "^[\\w-]+@([a-zA-z]+\\.)+[a-zA-Z]+$" ;
if ( content01. matches ( regStr01) ) {
System . out. println ( "true" ) ;
} else {
System . out. println ( "false" ) ;
}
String content02 = "-0.56" ;
String regStr02 = "^[-+]?([1-9]\\d*|0)(\\.\\d+)?$" ;
if ( content02. matches ( regStr02) ) {
System . out. println ( "true" ) ;
} else {
System . out. println ( "false" ) ;
}
String content03 = "http://www.sohu.com:8080/abc/index.html" ;
String regStr03 = "^([a-zA-Z]+)://([a-zA-Z.]+):(\\d+)[\\w-/]*/([\\w.]+)$" ;
Pattern pattern = Pattern . compile ( regStr03) ;
Matcher matcher = pattern. matcher ( content03) ;
if ( matcher. matches ( ) ) {
System . out. println ( "true" ) ;
System . out. println ( "整体匹配=" + matcher. group ( 0 ) ) ;
System . out. println ( "协议=" + matcher. group ( 1 ) ) ;
System . out. println ( "域名=" + matcher. group ( 2 ) ) ;
System . out. println ( "端口=" + matcher. group ( 3 ) ) ;
System . out. println ( "文件=" + matcher. group ( 4 ) ) ;
} else {
System . out. println ( "false" ) ;
}
}
}