Java之正则表达式常用操作

最新推荐文章于 2022-04-02 08:47:12 发布
find_czk
最新推荐文章于 2022-04-02 08:47:12 发布
阅读量388
点赞数 1
分类专栏： Java工具类文章标签： java
本文链接：https://blog.csdn.net/findzk/article/details/97974748
版权
Java工具类专栏收录该内容
5 篇文章 0 订阅
订阅专栏
                        
                     
 
工具类意义描述 
 正则表达式操作是最典型、最常用的技术之一，在绝大多数项目开发当中均会有所涉及。
 
代码实现
package com.czk.regex_utils;

import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

/**
 * 正则表达式处理工具类，字符串的匹配截取中
 * 
 * @author czk
 * 
 */
public class RegexPaserUtil {

	private String beginRegex;

	private String endRegex;

	private Matcher matcher;

	public final static String TEXTTEGEX = ".*?";

	public final static String W = "\\W*?";

	public final static String N = "";

	public final static String TEXTEGEXANDNRT = "[\\s\\S]*?";
	public final static String zel_all_chars = "[\\s\\S]*";

	private List&lt;String&gt; filterRegexList = new ArrayList&lt;String&gt;();

	// 是否为全正常中英文、符号的情况验证
	// public static String All_Chinese_Char =
	// "[·！/|“”？：（）()—\\s、,;.，。;!?\\-_A-Za-z\\d\\u4E00-\\u9FA5 ^ :&gt;~&amp;'\\=&gt;%@+\\pP\\pZ\\pM\\pS]";
	public static String All_Chinese_Char = "[\\sA-Za-z\\d\\u4E00-\\u9FA5\\pP\\pZ\\pM\\pN\u3040-\u309F\u30A0-\u30FF+\\-*/\\\\$●=&gt;&lt;|\\[\\]]";

	public Pattern All_Chinese_Char_Pattern = Pattern.compile(All_Chinese_Char);

	// 最常用汉字，自编
	public static String Frequency_Chinese_Char = "[的与是中网]";
	public Pattern Frequency_Chinese_Char_Pattern = Pattern
			.compile(Frequency_Chinese_Char);
	
	// 此处的中文判断，包括中文、英文、数字、中英文符号等
	public boolean isAllChineseChar(String source) {
		if (source == null || source.trim().length() == 0) {
			return true;
		} else {
			char[] charArray = source.toCharArray();
			for (char c : charArray) {
				if (!(All_Chinese_Char_Pattern.matcher(c + "").find())) {
					return false;
				}
			}
			return true;
		}
	}

	public RegexPaserUtil(){
		
	}
	
	public RegexPaserUtil(String beginRegex, String endRegex, String content,
			String textRegex) {

		this.beginRegex = beginRegex;

		this.endRegex = endRegex;

		StringBuilder sb = new StringBuilder();

		sb.append(beginRegex);

		sb.append(textRegex);

		sb.append(endRegex);
		matcher = Pattern.compile(sb.toString()).matcher(content);
	}

	// 此处的content变量暂未用
	public RegexPaserUtil(String beginRegex, String textRegex, String endRegex,
			String content, String flag) {
		this.beginRegex = beginRegex;

		this.endRegex = endRegex;

		StringBuilder sb = new StringBuilder();

		sb.append(beginRegex);

		sb.append(textRegex);

		sb.append(endRegex);
		// System.out.println("sb--------------" + sb);
		matcher = Pattern.compile(sb.toString()).matcher(content);
	}

	public RegexPaserUtil(String beginRegex, String endRegex, String textRegex) {

		this.beginRegex = beginRegex;

		this.endRegex = endRegex;

		StringBuilder sb = new StringBuilder();

		sb.append(beginRegex);

		sb.append(textRegex);

		sb.append(endRegex);
		matcher = Pattern.compile(sb.toString()).matcher(N);
	}

	public RegexPaserUtil(String beginRegex, String endRegex) {

		this.beginRegex = beginRegex;

		this.endRegex = endRegex;

		StringBuilder sb = new StringBuilder();

		sb.append(beginRegex);

		sb.append(TEXTTEGEX);

		sb.append(endRegex);

		matcher = Pattern.compile(sb.toString()).matcher(N);
	}
	
	public boolean isContainFreqChineseChar(String source) {
		if (source == null) {
			return false;
		}
		if (Frequency_Chinese_Char_Pattern.matcher(source).find()) {
			return true;
		}
		return false;
	}

	public String getSimpleText() {
		if (matcher.find()) {
			String str = matcher.group().trim();
			return str;
		}
		return null;
	}

	public String getText() {
		if (matcher.find()) {
			String str = matcher.group().trim().replaceFirst(beginRegex, N)
					.replaceAll(endRegex, N);
			Iterator&lt;String&gt; it = filterRegexList.iterator();
			while (it.hasNext()) {
				str = str.replaceAll(it.next(), N);
			}
			return str;
		}
		return null;
	}

	public String getLastText() {
		String str = null;
		while (matcher.find()) {
			str = matcher.group().trim().replaceFirst(beginRegex, N)
					.replaceAll(endRegex, N);
		}
		return str;
	}

	public String getNext() {
		return matcher.group();
	}

	public String getNextTxt() {
		String str = matcher.group().trim().replaceFirst(beginRegex, N)
				.replaceAll(endRegex, N);
		Iterator&lt;String&gt; it = filterRegexList.iterator();
		while (it.hasNext()) {
			str = str.replaceAll(it.next(), N);
		}
		return str;
	}

	/**
	 * 是指过滤了相关标签
	 * 
	 * @return
	 */
	public String getNextAddFilter() {
		String str = matcher.group();
		Iterator&lt;String&gt; it = filterRegexList.iterator();
		while (it.hasNext()) {
			str = str.replaceAll(it.next(), N);
		}
		return str;
	}

	/**
	 * 循环遍历时，得到真正的txt,而不是匹配全部
	 * 
	 * @return
	 */
	public String getNextText() {
		String str = matcher.group();
		str = str.replaceFirst(beginRegex, N).replaceAll(endRegex, N);
		return str;
	}

	public boolean hasNext() {
		return matcher.find();
	}

	public RegexPaserUtil reset(String content) {
		this.matcher.reset(content);
		return this;
	}

	public RegexPaserUtil addFilterRegex(String filterRegex) {
		filterRegexList.add(filterRegex);
		return this;
	}

	public String getTextList() {
		String str = "";
		int count = 0;
		while (matcher.find()) {
			if (count == 0) {
				str = matcher.group().trim().replaceFirst(beginRegex, N)
						.replaceAll(endRegex, N);
			} else {
				str += ("#" + matcher.group().trim()
						.replaceFirst(beginRegex, N).replaceAll(endRegex, N));
			}
			count++;
		}
		return str;
	}

	public static void main(String[] args) {
		// String beginRegex = "&lt;dd" + RegexPaserUtil.TEXTEGEXANDNRT + "&lt;/a&gt;";
		// String endRegex = "&lt;span&gt;";
		// String text = "&lt;dd&gt;    &lt;a a b c&gt;1&lt;/a&gt;//@&lt;a b c d&gt;2&lt;/a&gt;3 4&lt;span&gt;";
		// RegexPaserUtil ansjSayUrl = new RegexPaserUtil(beginRegex, endRegex,
		// text, RegexPaserUtil.TEXTEGEXANDNRT);
		String source = "2017-08-15  来源：亿邦动力网";

		String begin="来源:";
		String end="$";
		RegexPaserUtil regexPaserUtil = new RegexPaserUtil(begin,end);
		regexPaserUtil.reset(source);

		System.out.println(regexPaserUtil.getText());
		
		Pattern pattern=Pattern.compile("来源：([\\s\\S]*)");
		Matcher match=pattern.matcher(source);
		
		if(match.find()){
			System.out.println(match.group(1));
		}else {
			System.out.println("not find!");
		}
		
		

	}
}