正则表达式学习

正则表达式:

基本概念

  • java.util.regex.Pattern
方法摘要
static Patterncompile(String regex)
static Patterncompile(String regex,int flags)
Matchermatcher(CharSequence input)

  • java.util.regex.Matcher
方法摘要
booleanmatches() 匹配整个字符串
booleanfind() 找字串
Matcherreset()
booleanlookingAt()
Stringgroup()
intstart()
intend()
MatcherappendReplacement(StringBuffer sb,String replacement)
StringBufferappendTail(StringBuffer sb)

使用方法:

private static void demo1() {
        Pattern p = Pattern.compile("[A-Z]{3}");
        Matcher matcher = p.matcher("BaD");
        System.out.println(matcher.matches());      
        System.out.println("BaD".matches("[A-Z]{3}"));
    }

数量词: {} * + ?

private static void demo2() {
        System.out.println("aaa".matches("a+"));  //true
        System.out.println("aaa".matches("a?"));  //false
        System.out.println("aaa".matches("a{3,5}")); //true
        System.out.println("".matches("a*")); //true
        System.out.println("aaa".matches("aa")); //false
    }
数量词
a+a出现1次或以上
a*a出现0次或以上
a?a出现0次或1次
a{n}a出现n次
a{n,}a出现n次或以上
a{n,m}a出现[n,m]次

字符类: [] && |

    private static void demo3() {
        System.out.println("a".matches("[abc]"));  //[]里任一个
        System.out.println("a".matches("[^abc]")); //非abc任意字符
        System.out.println("A".matches("[a-zA-Z]"));
        System.out.println("A".matches("[a-z]|[A-Z]")); //或
        System.out.println("A".matches("[a-z[A-Z]]"));
        System.out.println("R".matches("[A-Z&&[RFG]]")); //取交集      
    }
字符类
[a-zA-Z][a-z[A-Z]];
A-Z&&[RFG]R,F,G
[a-z&&[^m-p]]a 到 z,而非 m 到 p:[a-lq-z]

边界匹配: \b ^ $

private static void demo4() {
        System.out.println("hello sir".matches("^hr$")); //false
        System.out.println("hello sir".matches("^h")); //false
        System.out.println("hello sir".matches("r$")); //false
        System.out.println("hello sir".matches("\\w+")); //false
        System.out.println("hello sir".matches("^h.*r$"));
        System.out.println("hello sir".matches("^h.*o\\b.*r$"));
    }
边界匹配器
\b单词边界
^行的开头
$行的结尾

预定义字符: . \d \s \w

private static void demo5() {
        System.out.println("abc888&^%".matches("[a-z]{1,3}\\d+[&^#%]+"));
        System.out.println(("\\".matches("\\\\")));
        System.out.println("    \n".matches("^[\\s&&[^\\n]]*\\n$"));
    }
预定义字符
.任何字符(与行结束符可能匹配也可能不匹配)[\s\S]
\d数字:[0-9]
\s空白字符:[ \t\n\x0B\f\r]
\w单词字符:[a-zA-Z_0-9]
\D非数字:[^0-9]
\S非空白字符:[^\s]
\W非单词字符:[^\w]

字符串查找:matches find lookingAt

  • lookingAt():从头找;
  • matches():匹配整个字符串;
  • find():找字串;
  • matches()和find()相互影响,记得reset();
private static void demo6() {
        Pattern p = Pattern.compile("\\d{3,5}");
        String s = "123-34345-234-00";
        Matcher m = p.matcher(s);  
        System.out.println(m.matches());  //false
        m.reset();  //否则第一次的find是4-9
        System.out.println(m.find());
        System.out.println(m.start() + "-" + m.end());  //[0,3)
        System.out.println(m.find());
        System.out.println(m.start() + "-" + m.end());
        System.out.println(m.find());
        System.out.println(m.start() + "-" + m.end());
        System.out.println(m.find());
        //System.out.println(m.start() + "-" + m.end());
        System.out.println(m.lookingAt());
        System.out.println(m.lookingAt()); //始终从头匹配
        System.out.println(m.lookingAt());
        System.out.println(m.lookingAt());
    }

字符串替换

private static void demo7() {
        Pattern p = Pattern.compile("java", Pattern.CASE_INSENSITIVE);
        Matcher m = p.matcher("java Java JAVa JaVa IloveJAVA you hateJava afasdfasdf");
        StringBuffer buf = new StringBuffer();
        int i=0;
        while(m.find()) {
            i++;
            if(i%2 == 0) {
                m.appendReplacement(buf, "java");
            } else {
                m.appendReplacement(buf, "JAVA");
            }
        }
        m.appendTail(buf); //否则 afasdfasdf丢失
        System.out.println(buf);
    }

标志位简写

private static void demo8() {
            Pattern p = Pattern.compile("java", Pattern.CASE_INSENSITIVE);
            System.out.println("Java".matches("(?i)(java)"));            
    }

正则表达式分组:group()

private static void demo9() {
        Pattern p = Pattern.compile("\\d{3,5}[a-z]{2}");
        String s = "123aa-34345bb-234cc-00";
        Matcher m = p.matcher(s);        
        while(m.find()) {          
            System.out.println(m.group()); //打印所有
        }

        Pattern p1 = Pattern.compile("(\\d{3,5})([a-z]{2})");    //()分组;第几个(就是第几组
        Matcher m1 = p1.matcher(s);        
        while(m1.find()) {          
            System.out.println(m1.group(1)); //打印数字;
            System.out.println(m1.group(2)); //打印字母;
        }
    }

反向引用

private static void demo10() {
             Pattern p = Pattern.compile("(\\d(\\d))\\2"); // \2表示与group(2)的结果一样
             String s = "122";
             Matcher m = p.matcher(s);
             System.out.println(m.matches());

             Pattern p1 = Pattern.compile("(\\d\\d)\\1"); // \1表示与group(1)的结果一样
             String s1 = "1212";
             Matcher m1 = p1.matcher(s1);
             System.out.println(m1.matches());
    }

零宽断言

private static void demo11() {
            String s = "444a66b";

            Pattern p = Pattern.compile(".{3}(?=a)");//444
            Pattern p = Pattern.compile("(?=a).{3}");//a66

            Pattern p = Pattern.compile("(?!a).{3}");//444 66b
            Pattern p = Pattern.compile(".{3}(?!a)");//44a 66b

            Pattern p = Pattern.compile(".{3}(?<=a)");//44a 
            Pattern p = Pattern.compile("(?<=a).{3}");// 66b

            Pattern p = Pattern.compile("(?<!a).{3}");// 444 a66
            Pattern p = Pattern.compile(".{3}(?<!a)");// 444 a66

            Matcher m = p.matcher(s);
            while(m.find()) {
                System.out.println(m.group());
            }
    }
零宽断言
(?=)lookahead 正断言
(?<=)lookbehind 正断言
(?!)lookahead 负断言
(?<!)lookbehind 负断言

解释(?=):   Pattern p = Pattern.compile("^(?=<)<[^>]+>\\w+");
            String s = "<div>antzone";
            Matcher m = p.matcher(s);
            while(m.find()) {
                System.out.println(m.group());
            }   

匹配过程如下:
首先由正则表达式中的”^”获取控制权,首先由位置0开始进行匹配,它匹配开始位置0,匹配成功,然后控制权转交给”(?=<)”,,由于”^”是零宽的,所以”(?=<)”也是从位置0处开始匹配,它要求所在的位置右侧必须是字符”<”,位置0的右侧恰好是字符”<”,匹配成功,然后控制权转交个”<”,由于”(?=<)”也是零宽的,所以它也是从位置0处开始匹配,于是匹配成功

解释(?!):   Pattern p = Pattern.compile("ab(?![A-Z])");
            String s = "abZW863ab88";
            Matcher m = p.matcher(s);
            while(m.find()) {
                System.out.println(m.group());
            }

匹配过程如下:
首先由正则表达式的字符”a”获取控制权,从位置0处开始匹配,匹配字符”a”成功,然后控制权转交给”b”,从位置1处开始匹配,配字符”b”成功,然后控制权转交给”(?![A-Z])”,它从位置2处开始匹配,它要求所在位置的右边不能够是任意一个大写字母,而位置的右边是大写字母”Z”,匹配失败,然后控制权又重新交给字符”a”,并从位置1处开始尝试,匹配失败,然后控制权再次交给字符”a”,从位置2处开始尝试匹配,依然失败,如此往复尝试,直到从位置7处开始尝试匹配成功,然后将控制权转交给”b”,然后从位置8处开始尝试匹配,匹配成功,然后再将控制权转交给”(?![A-Z])”,它从位置9处开始尝试匹配,它规定它所在的位置右边不能够是大写字母,匹配成功,但是它并不会真正匹配字符,所以最终匹配结果是”ab”。

解释(?<=):`     Pattern p = Pattern.compile("(?<=\\bre)\\w+\\b");       //ading
                String s = "reading a book";
                Matcher m = p.matcher(s);
                while(m.find()) {
                    System.out.println(m.group());
                }`

匹配模式

private static void demo12() {
        Pattern p = Pattern.compile(".{3,10}[0-9]"); //0-10
        Pattern p1 = Pattern.compile(".{3,10}?[0-9]"); //0-5
        Pattern p2 = Pattern.compile(".{3,10}+[0-9]"); //not match!
        String s = "aaaa5bbbb8";
        Matcher m = p.matcher(s);
        if (m.find())
            System.out.println(m.start() + "-" + m.end());
        else
            System.out.println("not match!");
        Matcher m1 = p1.matcher(s);
        if (m1.find())
            System.out.println(m1.start() + "-" + m1.end());
        else
            System.out.println("not match!");
        Matcher m2 = p2.matcher(s);
        if (m2.find())
            System.out.println(m2.start() + "-" + m2.end());
        else
            System.out.println("not match!");

    }
匹配模式
Greedy长匹配,不满足再往外吐 默认模式
Reluctant短匹配 ?跟在匹配次数后之后
Possessive长匹配,不满足不吐,直接返回找不到 +

常用匹配

  • 匹配空白行:Pattern.compile("^[\\s&&[\\n]]*\\n$");
  • 匹配邮件地址:Pattern.compile("[\\w[.-]]+@[\\w[.-]]+\\.[\\w]+");
  • 匹配标签中内容:Pattern.compile("(?<=<(\\w+)>).*(?=<\\/\\1>)"); //error:Look-behind group does not have an obvious maximum length near index 10(lookbehind 其中所指定的字符必须是指定数量的,不能用 * + 之类的,但可以用 {0, 100} 这种确定型的。
  • 解决办法:Pattern p=Pattern.compile("(?<=<(\\w{0,"+(Integer.MAX_VALUE-1)+"})>).*(?=<\\/\\1>)");

lookahead 没有这种限制。)

应用:代码统计小程序

package learn;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class MyLearn {
    static long whileline = 0;
    static long codeline = 0;
    static long commentline = 0;

    public static void main(String[] args) {
        String path = "F:\\eclipseworkspace\\mobilesafe\\src\\com\\example\\mobilesafe\\activity";
        File f = new File(path);
        File[] listFiles = f.listFiles();
        for (File file : listFiles) {
            if (file.getName().matches(".*\\.java$")) {
                parse(file);
            }
        }
        System.out.println("codeLines:" + codeline);
        System.out.println("whiteLines:" + whileline);
        System.out.println("commentLines:" + commentline);
    }

    private static void parse(File file) {
        BufferedReader br = null;
        boolean commentFlag = false;
        try {
            br = new BufferedReader(new FileReader(file));
            String line = "";
            while ((line = br.readLine()) != null) {
                line = line.trim();
                if (line.matches("^[\\s&&[^\\n]]*")) { // readLine已经把最后的\\n去掉了;^在[]里表示非;在开头开始
                    whileline++;
                } else if (line.startsWith("/*") && !line.endsWith("*/")) {
                    commentline++;
                    commentFlag = true;
                } else if (line.startsWith("/*") && line.endsWith("*/")) {
                    commentline++;
                } else if (commentFlag == true) {
                    commentline++;
                    if (line.endsWith("*/")) {
                        commentFlag = false;
                    }
                } else if (line.startsWith("//")) {
                    commentline++;
                } else {
                    codeline++;
                }

            }
        } catch (FileNotFoundException e) {

            e.printStackTrace();
        } catch (IOException e) {

            e.printStackTrace();
        } finally {
            if (br != null) {
                try {
                    br.close();
                    br = null;
                } catch (IOException e) {

                    e.printStackTrace();
                }
            }
        }

    }

}
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值