正则表达式入门知识

牛奶味的辣椒水

于 2020-07-23 19:59:58 发布

阅读量172

点赞数

文章标签： java

本文链接：https://blog.csdn.net/qq_41469636/article/details/107545654

版权

什么叫正则表达式？

正则表达式是一种编码格式，通过特定的格式来判断编码内容是否符合规范。

常用字符及作用

.：任意单个字符

[abc]:a或b或c

[^abc]：除a、b、c之外的任意字符

^:开始

$:结束

[a-z]:a到z

a|b:a或b

\s,\b,\d,\w:空格，单词边界，数字，数字字母下划线

\S,\B,\D,\S:上面取反

？、+、*：0到1次、1到n次、0到n次

{n}、{n,}、{n,m}：n次、n次以上、n到m次

在线生成工具：http://tools.jb51.net/regex/create_reg

正则表达式有三种匹配方式：

1.全匹配

2.部分匹配(开头匹配)

3.字符串匹配

使用方式：

创建Pattern对象（静态方法创建）
与字符匹配
使用的匹配模式（三种）

//matches使用方法：判断整体字符串和正则表达式是否匹配

		Pattern p=Pattern.compile(".*\\s");

		Matcher m=p.matcher("String");

		Boolean s=m.matches();

//lookingAt使用方式：判断字符串的开头是否匹配正则表达式

		Pattern p=Pattern.compile("正则表达式");

		Matcher m=p.matcher("待判断的字符串");

		Boolean s=m.lookingAt();

//Find使用方法：判断字符串中是否有符合正则表达式的子串
		Pattern p=Pattern.compile("正则表达式");
		Matcher m=p.matcher("待判断的字符串");
		while(m.find())
		{
			System.out.print(m.start());//返回子串起始索引
			System.out.print(m.end());//返回子串结束索引
			System.out.print(m.group());//返回捕获组
		}

$1和$2

在正则表达式中，$1和组（group）的功能类似，都是用于特定的截取规则，可以截取到已经获取到的字符的部分内容。

package com.pattern;

public class Demo_$1_$2 {

	public static void main(String[] args) {
		String tel="15629193043";
		//将电话号码中间四位隐藏****代替
		
		String after=tel.replaceAll("(\\d{3})(\\d{4})(\\d{4})","$1****$3");
		
		System.out.println(after);
	}
}

\0\1\2用法：

package com.pattern;

public class Demo_$1_$2 {

	public static void main(String[] args) {
		String msg="我我我我喜喜喜喜喜欢欢欢欢欢你你你你你";
		String msg2=msg.replaceAll("(.)\\1+", "$1");
		System.out.println(msg2);
	}
}

正则表达式应用：

1.填写表格的时候输入内容的格式的限制

2.爬虫

3.聊天内容的时候敏感词汇的屏蔽

4.聊天记录的提取

正则表达式小案例

这个案例可以用来计算文件（使用了上节课用的文件类内容以及目录遍历）中的代码、空行、注释的行数（流程配了相应的注释）

package com.work;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileFilter;
import java.io.FileNotFoundException;
import java.io.FileReader;

//计算文件中有多少行注释
//多少行代码
//多少行空格




public class W3 {
	
	private static long space,code,commons;
	/*思路：
	 *   1.遍历文件(需要获得path，因此需要一个文件参数)
	 *   2.文件过滤
	 *   3.读文件中的内容
	 *   4.正则表达式，统计
	 */
	public static void fun(File dir) {
		//找到java文件，需要使用FileFilter文件过滤器
		//将目录内容列举出来
		File[] fs=dir.listFiles(new FileFilter() {
			
			//过滤器
			@Override
			public boolean accept(File pathname) {
				//允许通过的条件是：是目录或者文件名以.java结尾
				if(pathname.isDirectory()||pathname.getName().endsWith(".java"))
				{
					return true;
				}
				return false;
			}
		});
		//fs中保存的是经过过滤后的文件
		//遍历文件，如果是文件就读其中的内容如果是目录就递归
		for(File f:fs)
		{
			if(f.isFile())
			{
				readFile(f);
			}else
			{
				fun(f);
			}
		}
		
	}
	
	//读文件内容
	public static void readFile(File file) {
		boolean isCommon = false;//标记多行注释开始
		if(file != null && file.exists())
		{
			//文件输入流
			try {
				FileReader reader = new FileReader(file);          //使用文件字节流
				BufferedReader br = new BufferedReader(reader);    //将文件字节流转化成字符流（字节流一般用来读取音频和图片这类使用二进制方式保存的文件）
				String line = null;                                //保存读取的内容
				while((line=br.readLine())!=null)                  //如果内容不为空
				{
					line = line.trim();                            //去掉前后不必要的空格
					if(line.matches("^[\\s]*$"))                   //以空格开头和结尾，空格出现0到n次
					{
						space++;
					}else if(line.matches("^//.*"))                //以\\开头，后面接任意个字符（匹配//单行注释）
					{
						commons++;
					}else if(line.matches("^/\\*.*\\*/$"))         //以\*开头，后面任意个字符，然后以*/结尾（匹配单行注释）
					{
						commons++;
					}
					else if(line.matches("^/\\*.*")&& !line.matches(".*\\*/$"))     //以/*开头，后面接任意个字符并且任意字符之后不是以*/结尾
					{
						isCommon=true;
						commons++;
					}else if(isCommon)
					{
						if(line.matches(".*\\*/$"))                //当任意字符后接*/结尾，就说明多行注释结束(匹配度多行注释)
						{
							isCommon = false;
							commons++;
						}
						else
						{
							commons++;
						}
					}
					else {
						code++;
					}
					
				}
			} catch (Exception e) {
				e.printStackTrace();
			}
		}
		
	}
	
	
	
	//1.创建一个目录（自己要统计的目录）
	public static void main(String[] args) {
		File file =new File("E://pro");
		fun(file);
		System.out.println("有效代码行："+code);
		System.out.println("注释："+commons);
		System.out.println("空行："+space);
		
	}

}

爬虫（用于爬取网页图片，正则表达式用于截取网址，涉及到后面才学的IO流，暂且就这样）

package com.work;

import java.io.BufferedReader;
import java.io.FileOutputStream;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.MalformedURLException;
import java.net.URL;
import java.net.URLConnection;
import java.util.UUID;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class W2 {

	public static void main(String[] args) throws Exception {
		// ① 设置爬哪个网站
		String path = "http://www.goupuzi.com/";
		URL url = new URL(path);

		// ②建立连接
		URLConnection conn = url.openConnection();

		// ③ 获取输入流读网站上的数据
		InputStream is = conn.getInputStream();

		InputStreamReader isr = new InputStreamReader(is, "utf-8");// 字节流转换成字符流的桥梁
		// 每次读1行
		BufferedReader br = new BufferedReader(isr);

		StringBuffer buffer = new StringBuffer();
		String line = null;
		while ((line = br.readLine()) != null) {
			buffer.append(line + "\n");
		}

		// ④ 使用正则表达式找到我们想要找的内容
		// <img.*/> 找到所有图片标签
		Pattern p = Pattern.compile("http://.*.jpg");
		Matcher m = p.matcher(buffer.toString());
		while (m.find()) {
			System.out.println(m.group());
			downImg(m.group());
		}
	}

	public static void downImg(String path) throws Exception {
		URL url = new URL(path);
		URLConnection conn = url.openConnection();
		InputStream is = conn.getInputStream();
		FileOutputStream fos = new FileOutputStream("src/imgs/" + UUID.randomUUID() + ".png");
		byte[] arr = new byte[1024];
		int len = -1;
		while ((len = is.read(arr)) != -1) {
			fos.write(arr, 0, len);
			fos.flush();
		}
		fos.close();
		is.close();
	}
}