黑马程序员——Java基础--正则表达式

最新推荐文章于 2024-09-20 15:32:13 发布

very爱伱

最新推荐文章于 2024-09-20 15:32:13 发布

阅读量469

点赞数

文章标签：黑马程序员 java 正则表达式 regex

本文链接：https://blog.csdn.net/qq_29211749/article/details/46782449

版权

------ Java培训、Android培训、iOS培训、.Net培训、期待与您交流！ -------

正则表达式

一、了解正则表达式用于操作字符串数据。
    正则表达式用于操作字符串数据。通过一些特定的符号来实现的。所以我们为了掌握正则表达式，必须要学习一些符号。
    但是正则表达式的弊端是虽然简化了，但是阅读性差。
    初次引用的是String类下有个方法：
     	public boolean matches(String regex)告知此字符串是否匹配给定的正则表达式。

演示：

public boolean matches(String regex)告知此字符串是否匹配给定的正则表达式。 
*/

class RegexDemo
{
	public static void main(String[] args)	
	{
		String qq = "123213132141432";

	//	checkQQ(qq);
//正则表达式[1-9]表示第一位只能是1-9，[0-9]表示第二位，{4,14}表示次数，表示后面的数只能是[0-9]，并且是4-14
		String regex = "[1-9][0-9]{4,14}"; 
		//与字符串QQ匹配
		boolean b = qq.matches(regex);

		System.out.println(qq+":"+b);
	}
	/*
	需求：地应一个功能对QQ号进行校验
	要求：长度5-15.只能是数字，0不能开头
	*/
	public static void checkQQ(String qq)
	{
		int len = qq.length();

		if(len>=5 && len<=15)
		{
			if(!qq.startsWith("0"))
			{
				try
				{
				//parseLong(String s) 
         			// 将 string 参数解析为有符号十进制 long。
				long l = Long.parseLong(qq);
			
				System.out.println(l+":正确");
				}
				catch(NumberFormatException e)
				{
					System.out.println(qq+":含有非法字符");
				}
			}
			else
			{
				System.out.println(qq+":不能0开头");
			}
	
		}
		else
			System.out.println(qq+":错误");
	}
}

二、正则表达式常用符号
    1、字符
	x 字符 x 
	\\ 反斜线字符 
	\t 制表符 ('\u0009') 
	\n 新行（换行）符 ('\u000A') 
	\r 回车符 ('\u000D') 
	\e 转义符 ('\u001B') 
 
    2、字符类 
	[abc] a、b 或 c（简单类） 判断某一位只能是这三种的一个
	[^abc] 任何字符，除了 a、b 或 c（否定） 
	[a-zA-Z] a 到 z 或 A 到 Z，两头的字母包括在内（范围） 
	[a-d[m-p]] a 到 d 或 m 到 p：[a-dm-p]（并集） 
	[a-z&&[def]] d、e 或 f（交集） 
	[a-z&&[^bc]] a 到 z，除了 b 和 c：[ad-z]（减去） 
	[a-z&&[^m-p]] a 到 z，而非 m 到 p：[a-lq-z]（减去） 
    3、预定义字符类 
	. 任何字符（与行结束符可能匹配也可能不匹配） 
	\d 数字：[0-9] 
	\D 非数字： [^0-9] 
	\s 空白字符：[ \t\n\x0B\f\r] 
	\S 非空白字符：[^\s] 
	\w 单词字符：[a-zA-Z_0-9] 《**》
	\W 非单词字符：[^\w] 
    4、边界匹配器 
	^ 行的开头 
	$ 行的结尾 
	\b 单词边界 ——
	\B 非单词边界 
    5、Greedy 数量词 
	X? X，一次或一次也没有 
	X* X，零次或多次 
	X+ X，一次或多次 
	X{n} X，恰好 n 次 
	X{n,} X，至少 n 次 
	X{n,m} X，至少 n 次，但是不超过 m 次

三、正则表达式对字符串的常见操作：
    1.匹配。
	其实使用的就是String类中的matches方法。
    2.切割。
	其实使用的就是String类中的split方法。
    3.替换
	其实使用的就是String类中的replaceAll方法。
    4.获取。

	通过Pattern类将正则对象封装，通过正则对象的matcher方法字符串相关联。获取要对字符串操作的匹配器对象Matcher，    通过Matcher匹配器对象的方法对字符串进行操作。

演示：

import java.util.regex.*;
class RegexDemo2
{
	public static void main(String[] args)
	{
		functionDemo_4();
	}	
	/*
	获取
	
	将正则规则进行对象的封装
	Pattern p = Pattern.compile("a*b");

通过正则对象的matcher方法字符串相关联。获取要对字符串操作的匹配器对象Matcher
	Matcher m = p.matcher("aaaaab");
	
	通过Matcher匹配器对象的方法对字符串进行操作。
 	boolean b = m.matches();

	*/

	public static void functionDemo_4()
	{
		String str = "da jia hao ,ming tian bu fang jia!";

		String regex = "\\b[a-z]{3}\\b";//单词边界\b

		//1.将正则封装成对象。
		Pattern p = Pattern.compile(regex);

		//2.通过正则对象获取匹配器对象
		Matcher m = p.matcher(str);

		//3.使用Matcher对象的方法对字符串进行操作。
		//既然要获取三个字母组成的字符串 
		//查找。find();

		System.out.println(str);

		while(m.find())
		{
			System.out.println(m.group());//获取匹配的子序列
			
			System.out.println(m.start()+":"+m.end());
		}
	}

	/*
	替换
		
	*/
	public static void functionDemo_3()
	{
		String str = "zhangsanttttxianqiangmmmmmmmmzhouliu";

		str = str.replaceAll("(.)\\1+","#");//$1获取前一个参数的第一组

		sop(str);
		
		String tel = "15800001111";//158****1111;

		tel = tel.replaceAll("(\\d{3})\\d{4}(\\d{4})","$1****$2");

		sop(tel);

	}
	/*
	切割
	*/
	public static void functionDemo_2()
	{
	//1	String str = "zhangsan     xianqiang  zhouliu";

	//2	String str = "zhangsan.xianqiang.zhouliu";

		String str = "zhangsanttttxianqiangmmmmmmmmzhouliu";
 
	//1	String regex = " {1,}";

	//1	String[] s = str.split(regex);

	//1	String[] s = str.split(" +");

	//2	String[] s = str.split("\\.");// "\\."

	String[] s = str.split("(.)\\1+");//()用于封装，一个是组，自动编号

		for(String name : s)

		sop(name);
	}
	/*
	演示匹配
	*/

	public static void functionDemo_1()
	{
		//匹配手机号码是否正确。

		String tel = "15800001111";

		//String regex = "1[358][0-9]{9}";

		String regex = "1[358]\\d{9}";  //  "\\d"代表数字0-9

		boolean b = tel.matches(regex);

		System.out.println(b);
	}
	public static void sop(Object obj)
	{
		System.out.println(obj);
	}
		
}

练习：
    1.治疗口吃：我我....我我我我..要要要要要..学学学..编编编.编编...编程
    2.对IP地址排序
    3.对邮件地址校验

import java.util.*;
class RegexTest
{
	public static void main(String[] args)
	{
		test_3();
	}

	//治口吃
	public static void test_1()
	{
		String str = "我我....我我我我..要要要要要..学学学..编编编.编编...编程";

		//1.将字符串中的.去掉，用替换

		str = str.replaceAll("\\.+","");

		sop(str);
		//2.将字符串中的叠词替换

		str = str.replaceAll("(.)\\1+","$1");

		sop(str);
	}
	//2.校验IP地址

	//192.168.10.34  127.0.0.1  3.3.3.3  105.70.11.55
	//数字越小越靠前
	public static void test_2()
	{
		
		String ip_str = "192.168.10.34   127.0.0.1   3.3.3.3  105.70.11.55";

<span style="white-space:pre">		</span>//为了上IP可以按照字符串顺序比较，只要让IP的每一段位数相同
<span style="white-space:pre">		</span>//所以，补零，按照每一位所需做多个0进行补充，每一段都加两个0
	
		ip_str = ip_str.replaceAll("(\\d+)","00$1");

		sop(ip_str);
<span style="white-space:pre">		</span>//00192.00168.0010.0034   00127.000.000.001   003.003.003.003   00105.0070.0011.0055

		ip_str = ip_str.replaceAll("0*(\\d{3})","$1");

		sop(ip_str);
<span style="white-space:pre">		</span>//192.168.010.034   127.000.000.001   003.003.003.003  105.070.011.055		

		//1.将IP地址切割

		String[] ips = ip_str.split(" +");
		//进行排序
		TreeSet<String> ts = new TreeSet<String>();

		for(String ip : ips)
		{
			//sop(ip);
			ts.add(ip);
		}

		for(String ip : ts)

			sop(ip.replaceAll("0*(\\d+)","$1"));
	/*原有结果：	
		105.70.11.55
		127.0.0.1
		192.168.10.34
		3.3.3.3
	处理后结果：
		3.3.3.3
		105.70.11.55
		127.0.0.1
		192.168.10.34


	*/
		

	}
	//对邮箱地址校验
	public static void test_3()
	{
		String mail = "abc@sina.com";

		String regex = "[a-zA-Z0-9_]+@[a-zA-Z0-9]+\\.[a-zA-Z]{2,3}";

		//regex = "\\w+@\\w+(\\.\\w+)+";笼统匹配

		boolean b = mail.matches(regex);

		sop(mail+":"+b);
	}


	public static void sop(Object obj)
	{
		System.out.println(obj);
	}

}

/*

网页爬虫
其实就是一个程序用于在互联网中获取符合指定规则的数据

爬取邮箱地址。

*/
import java.util.*;
import java.util.regex.*;
import java.io.*;
import java.net.*;
class RegexTest2
{
	public static void main(String[] args)throws IOException
	{
		List<String> list = getMailByWeb();

		for(String mail : list)
			System.out.println(mail);	
	}

	public static List<String> getMailByWeb() throws IOException
	{
		//1.读取源文件
//BufferedReader bufr = new BufferedReader(new FileReader(""));

		URL url = new URL("http://www.baidu.com");

		BufferedReader bufIn = new BufferedReader(new InputStreamReader

(url.openStream()));
	
		//2。对读取的数据进行规则的匹配，从中获取符合规则的数据
		String mail_regex = "\\w+@\\w(\\.\\w+)+";

		List<String> list = new ArrayList<String>();

		Pattern p = Pattern.compile(mail_regex);

		String line = null;

		while((line=bufIn .readLine())!=null)
		{
			Matcher m = p.matcher(line);

			while(m.find())
			{//将集合数据存储起来
				list.add(m.group());
			}
		}
		return list;		

		
		//3.将符合规则的数据存储到集合中。
	}

	public static List<String> getMail()throws IOException
	{
		//1.读取源文件
		BufferedReader bufr = new BufferedReader(new FileReader("c:\\mail.html"));

		//2。对读取的数据进行规则的匹配，从中获取符合规则的数据
		String mail_regex = "\\w+@\\w(\\.\\w+)+";

		List<String> list = new ArrayList<String>();

		Pattern p = Pattern.compile(mail_regex);

		String line = null;

		while((line=bufr.readLine())!=null)
		{
			Matcher m = p.matcher(line);

			while(m.find())
			{//将集合数据存储起来
				list.add(m.group());
			}
		}
		return list;		

		
		//3.将符合规则的数据存储到集合中。
	}

}