Java基础：正则表达式regex

最新推荐文章于 2024-05-29 16:29:48 发布

zhoujing_0424

最新推荐文章于 2024-05-29 16:29:48 发布

阅读量617

点赞数

分类专栏： Java基础文章标签： java基础正则表达式 regex 字符串

本文链接：https://blog.csdn.net/zhoujing_0424/article/details/49310295

版权

Java基础专栏收录该内容

19 篇文章 0 订阅

订阅专栏

正则表达式是对字符串操作的一种逻辑公式，就是用事先定义好的一些特定字符、及这些特定字符的组合，组成一个“规则字符串”，这个“规则字符串”用来表达对字符串的一种过滤逻辑。
给定一个正则表达式和另一个字符串，我们可以达到如下的目的：
1. 给定的字符串是否符合正则表达式的过滤逻辑（称作“匹配”）；
2. 可以通过正则表达式，从字符串中获取我们想要的特定部分。
正则表达式的特点是：
1. 灵活性、逻辑性和功能性非常的强；
2. 可以迅速地用极简单的方式达到字符串的复杂控制。
3. 对于刚接触的人来说，比较晦涩难懂。

我们通过以下几个例子来学习正则表达式。

package com.zj.regex;

import java.io.BufferedReader;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.ArrayList;
import java.util.List;
import java.util.TreeSet;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class RegexDemo {

    /**
     * 需求：定义一个功能对QQ号进行验证
     * 要求：长度5~15位，只能是数字，0不能开头
     * @param qq
     */
    public static void checkQQ(){
        String qq = "1234567s";
        String regex="[1-9][0-9]{4,14}";
        boolean b=qq.matches(regex);
        System.out.println(qq+":"+b);
    }

    /**
     * 常见操作1：匹配
     *      其实使用的就是String类中的matches方法。
     */
    public static void function1(){
        String tel="15200000000";
        String regex="1[358]\\d{9}";// \\d中的一个\为转义
        boolean b=tel.matches(regex);
        System.out.println(tel+":"+b);
    }

    /**
     * 常见操作2：切割
     *      其实就是使用String类中的split方法。
     */
    public static void function2(){
        String nameStr1="zhangsan    lisi   wangwu";
        String nameStr2="zhangsan.lisi.wangwu";
        String nameStr3="zhangsan#######lisi@@@@@@wangwu";//用连续的叠词切割

        String[] names1=nameStr1.split(" +");//空格出现一次或多次
        String[] names2=nameStr2.split("\\.");//.在regex代表任意符号，用\转义一次为.，再在字符串中转义一次
        String[] names3=nameStr3.split("(.)\\1+");//复用第1组至少一次，组((A)(B(C))),组号按左括号从左至右数
        for(String name:names1){
            System.out.println(name);   
        }

        for(String name:names2){
            System.out.println(name);   
        }

        for(String name:names3){
            System.out.println(name);   
        }
    }

    /**
     * 常见操作3：替换
     *      其实使用的就是String类中的replaceAll()方法。
     */
    public static void function3(){
        String str="zhangsan！！！！！！lisi@@@@@@wangwu";
        String str1=str.replaceAll("(.)\\1+", "#");//叠词替换成#
        String str2=str.replaceAll("(.)\\1+", "$1");//叠词替换成对应词的单个字符，$表示使用前一个参数的组

        String tel="15200006825";
        tel=tel.replaceAll("(\\d{3})\\d{4}(\\d{4})", "$1****$2");//只要前一个参数的第1和第2组

        System.out.println(str1);
        System.out.println(str2);
        System.out.println(tel);
    }

    /**
     * 常见操作4：获取:
     *      1.Pattern p=Pattern.compile("a*b");
     *          将正则规则进行对象的封装。
     *      2.Matcher m=p.matcher("aaaaaab");
     *          通过正则对象的matcher方法和字符串关联，获取要对字符串操作的匹配器对象matcher。
     *      3.boolean b=m.matches();
     *          通过matcher匹配器对象的方法对字符串进行操作。
     */
    public static void function4(){
        String str="da jia hao, wo jiao zhou jing";
        //获取字符串中三个字母的单词
        String regex="\\b[a-z]{3}\\b";//带边界的三个字母的单词

        //1.将正则规则封装成对象。
        Pattern p=Pattern.compile(regex);
        //2.通过正则对象获取匹配器对象。
        Matcher m=p.matcher(str);
        //3.使用Matcher对象的方法对字符串进行操作。
        System.out.println(str);
        while(m.find()){//查找
            System.out.print(m.start()+":"+m.end()+"  ");
            System.out.println(m.group());//输出匹配的序列
        }
    }

    /**
     * 练习1：治疗口吃
     */
    public static void exercise1(){
        String str="我我我我...要要要...学学....编编编编编程程程程程程程程";
        str=str.replaceAll("\\.", "");//去掉.号
        System.out.println(str);
        str=str.replaceAll("(.)\\1+", "$1");//叠词替换成对应词的单个字符，$表示使用前一个参数的组
        System.out.println(str);
    }

    /**
     * 练习2：IP地址排序
     */
    public static void exercise2(){
        String IPStr="192.168.10.23  127.0.0.1   105.70.11.55  3.3.3.3";

        //1.为了让IP可以按照字符串的顺序比较，只要让IP的每一段位数相同
        //  所以，需要补0，按照每一位所需最多0补充
        //  即，每一段都补两个0

        IPStr=IPStr.replaceAll("(\\d+)", "00$1");
        //2.每一段保留数字3位
        IPStr=IPStr.replaceAll("0*(\\d{3})", "$1");//用组(\\d{3})，即后三位替换每一段
        System.out.println(IPStr);

        //3.切割
        String[] IPS=IPStr.split(" +");

        //4.排序
        TreeSet<String> ts=new TreeSet<String>();

        for(String IP:IPS){
            //System.out.println(IP);
            ts.add(IP);
        }

        //5.去掉多余的0并输出
        for(String IP:ts){
            System.out.println(IP.replaceAll("0*(\\d+)", "$1"));//用不带0的组(\\d{3})替换带0的段
        }

    }

    /**
     * 练习3：邮件地址校验
     */
    public static void exercise3(){
        String mail1="abc1@sina.com";
        String mail2="abc1@sina.com.cn";
        String regex="[a-zA-Z0-9_]+@[a-zA-Z0-9]+(\\.[a-zA-Z]{1,3}){1,3}";

        String regex2="\\w+@\\w+(\\.\\w+)+";//较为笼统的校验，如1@1.1都可以通过校验

        boolean b1=mail1.matches(regex);
        boolean b2=mail1.matches(regex);
        System.out.println(mail1+":"+b1);
        System.out.println(mail2+":"+b2);
    }

    /**
     * 练习4：网页爬虫，即一个程序用于在互联网中获取符合指定规则的数据
     * @throws IOException 
     */
    public static void exercise4() throws IOException{
        //爬去邮箱地址
        List<String>  list=new ArrayList<String>();
        //1. 读取源文件
        //本地文件
        BufferedReader bufr=new BufferedReader(new FileReader("d:\\mail.html"));

        //网页文件
//      URL url=new URL("https://www.baidu.com/?tn=90950831_hao_pg");
//      BufferedReader bufr = new BufferedReader(new InputStreamReader(url.openStream()));

        //2. 对读取的数据进行规则匹配，从中获取符合规则的数据
        String mail_regex="\\w+@\\w+(\\.\\w+)+";//邮箱的正则表达式
        String webAdress_regex="http://www(\\.\\w+)+";//网址的正则表达式
        Pattern p=Pattern.compile(webAdress_regex);

        //3. 将符合规则的数据存储到集合中
        String line=null;
        while((line=bufr.readLine())!=null){
            Matcher m=p.matcher(line);
            while(m.find()){
                list.add(m.group());
            }   
        }

        for(String l:list){
            System.out.println(l);
        }
    }

    public static void main(String[] args) throws IOException {

//      checkQQ();

//      function1();
//      function2();
//      function3();
//      function4();

//      exercise1();
//      exercise2();
//      exercise3();
        exercise4();

    }


}

zhoujing_0424

关注

0
点赞
踩
1

收藏

觉得还不错? 一键收藏
0
评论
Java基础：正则表达式regex

正则表达式是对字符串操作的一种逻辑公式，就是用事先定义好的一些特定字符、及这些特定字符的组合，组成一个“规则字符串”，这个“规则字符串”用来表达对字符串的一种过滤逻辑。给定一个正则表达式和另一个字符串，我们可以达到如下的目的： 1. 给定的字符串是否符合正则表达式的过滤逻辑（称作“匹配”）； 2. 可以通过正则表达式，从字符串中获取我们想要的特定部分。正则表达式的特点是： 1. 灵活性
复制链接

扫一扫

专栏目录