Java之正则表达式常用操作

  • 工具类意义描述
    • 正则表达式操作是最典型、最常用的技术之一,在绝大多数项目开发当中均会有所涉及。
  • 代码实现
  • package com.czk.regex_utils;
    
    import java.util.ArrayList;
    import java.util.Iterator;
    import java.util.List;
    import java.util.regex.Matcher;
    import java.util.regex.Pattern;
    
    /**
     * 正则表达式处理工具类,字符串的匹配截取中
     * 
     * @author czk
     * 
     */
    public class RegexPaserUtil {
    
    	private String beginRegex;
    
    	private String endRegex;
    
    	private Matcher matcher;
    
    	public final static String TEXTTEGEX = ".*?";
    
    	public final static String W = "\\W*?";
    
    	public final static String N = "";
    
    	public final static String TEXTEGEXANDNRT = "[\\s\\S]*?";
    	public final static String zel_all_chars = "[\\s\\S]*";
    
    	private List<String> filterRegexList = new ArrayList<String>();
    
    	// 是否为全正常中英文、符号的情况验证
    	// public static String All_Chinese_Char =
    	// "[·!/|“”?:()()—\\s、,;.,。;!?\\-_A-Za-z\\d\\u4E00-\\u9FA5 ^ :>~&'\\=>%@+\\pP\\pZ\\pM\\pS]";
    	public static String All_Chinese_Char = "[\\sA-Za-z\\d\\u4E00-\\u9FA5\\pP\\pZ\\pM\\pN\u3040-\u309F\u30A0-\u30FF+\\-*/\\\\$●=><|\\[\\]]";
    
    	public Pattern All_Chinese_Char_Pattern = Pattern.compile(All_Chinese_Char);
    
    	// 最常用汉字,自编
    	public static String Frequency_Chinese_Char = "[的与是中网]";
    	public Pattern Frequency_Chinese_Char_Pattern = Pattern
    			.compile(Frequency_Chinese_Char);
    	
    	// 此处的中文判断,包括中文、英文、数字、中英文符号等
    	public boolean isAllChineseChar(String source) {
    		if (source == null || source.trim().length() == 0) {
    			return true;
    		} else {
    			char[] charArray = source.toCharArray();
    			for (char c : charArray) {
    				if (!(All_Chinese_Char_Pattern.matcher(c + "").find())) {
    					return false;
    				}
    			}
    			return true;
    		}
    	}
    
    	public RegexPaserUtil(){
    		
    	}
    	
    	public RegexPaserUtil(String beginRegex, String endRegex, String content,
    			String textRegex) {
    
    		this.beginRegex = beginRegex;
    
    		this.endRegex = endRegex;
    
    		StringBuilder sb = new StringBuilder();
    
    		sb.append(beginRegex);
    
    		sb.append(textRegex);
    
    		sb.append(endRegex);
    		matcher = Pattern.compile(sb.toString()).matcher(content);
    	}
    
    	// 此处的content变量暂未用
    	public RegexPaserUtil(String beginRegex, String textRegex, String endRegex,
    			String content, String flag) {
    		this.beginRegex = beginRegex;
    
    		this.endRegex = endRegex;
    
    		StringBuilder sb = new StringBuilder();
    
    		sb.append(beginRegex);
    
    		sb.append(textRegex);
    
    		sb.append(endRegex);
    		// System.out.println("sb--------------" + sb);
    		matcher = Pattern.compile(sb.toString()).matcher(content);
    	}
    
    	public RegexPaserUtil(String beginRegex, String endRegex, String textRegex) {
    
    		this.beginRegex = beginRegex;
    
    		this.endRegex = endRegex;
    
    		StringBuilder sb = new StringBuilder();
    
    		sb.append(beginRegex);
    
    		sb.append(textRegex);
    
    		sb.append(endRegex);
    		matcher = Pattern.compile(sb.toString()).matcher(N);
    	}
    
    	public RegexPaserUtil(String beginRegex, String endRegex) {
    
    		this.beginRegex = beginRegex;
    
    		this.endRegex = endRegex;
    
    		StringBuilder sb = new StringBuilder();
    
    		sb.append(beginRegex);
    
    		sb.append(TEXTTEGEX);
    
    		sb.append(endRegex);
    
    		matcher = Pattern.compile(sb.toString()).matcher(N);
    	}
    	
    	public boolean isContainFreqChineseChar(String source) {
    		if (source == null) {
    			return false;
    		}
    		if (Frequency_Chinese_Char_Pattern.matcher(source).find()) {
    			return true;
    		}
    		return false;
    	}
    
    	public String getSimpleText() {
    		if (matcher.find()) {
    			String str = matcher.group().trim();
    			return str;
    		}
    		return null;
    	}
    
    	public String getText() {
    		if (matcher.find()) {
    			String str = matcher.group().trim().replaceFirst(beginRegex, N)
    					.replaceAll(endRegex, N);
    			Iterator<String> it = filterRegexList.iterator();
    			while (it.hasNext()) {
    				str = str.replaceAll(it.next(), N);
    			}
    			return str;
    		}
    		return null;
    	}
    
    	public String getLastText() {
    		String str = null;
    		while (matcher.find()) {
    			str = matcher.group().trim().replaceFirst(beginRegex, N)
    					.replaceAll(endRegex, N);
    		}
    		return str;
    	}
    
    	public String getNext() {
    		return matcher.group();
    	}
    
    	public String getNextTxt() {
    		String str = matcher.group().trim().replaceFirst(beginRegex, N)
    				.replaceAll(endRegex, N);
    		Iterator<String> it = filterRegexList.iterator();
    		while (it.hasNext()) {
    			str = str.replaceAll(it.next(), N);
    		}
    		return str;
    	}
    
    	/**
    	 * 是指过滤了相关标签
    	 * 
    	 * @return
    	 */
    	public String getNextAddFilter() {
    		String str = matcher.group();
    		Iterator<String> it = filterRegexList.iterator();
    		while (it.hasNext()) {
    			str = str.replaceAll(it.next(), N);
    		}
    		return str;
    	}
    
    	/**
    	 * 循环遍历时,得到真正的txt,而不是匹配全部
    	 * 
    	 * @return
    	 */
    	public String getNextText() {
    		String str = matcher.group();
    		str = str.replaceFirst(beginRegex, N).replaceAll(endRegex, N);
    		return str;
    	}
    
    	public boolean hasNext() {
    		return matcher.find();
    	}
    
    	public RegexPaserUtil reset(String content) {
    		this.matcher.reset(content);
    		return this;
    	}
    
    	public RegexPaserUtil addFilterRegex(String filterRegex) {
    		filterRegexList.add(filterRegex);
    		return this;
    	}
    
    	public String getTextList() {
    		String str = "";
    		int count = 0;
    		while (matcher.find()) {
    			if (count == 0) {
    				str = matcher.group().trim().replaceFirst(beginRegex, N)
    						.replaceAll(endRegex, N);
    			} else {
    				str += ("#" + matcher.group().trim()
    						.replaceFirst(beginRegex, N).replaceAll(endRegex, N));
    			}
    			count++;
    		}
    		return str;
    	}
    
    	public static void main(String[] args) {
    		// String beginRegex = "<dd" + RegexPaserUtil.TEXTEGEXANDNRT + "</a>";
    		// String endRegex = "<span>";
    		// String text = "<dd>    <a a b c>1</a>//@<a b c d>2</a>3 4<span>";
    		// RegexPaserUtil ansjSayUrl = new RegexPaserUtil(beginRegex, endRegex,
    		// text, RegexPaserUtil.TEXTEGEXANDNRT);
    		String source = "2017-08-15  来源:亿邦动力网";
    
    		String begin="来源:";
    		String end="$";
    		RegexPaserUtil regexPaserUtil = new RegexPaserUtil(begin,end);
    		regexPaserUtil.reset(source);
    
    		System.out.println(regexPaserUtil.getText());
    		
    		Pattern pattern=Pattern.compile("来源:([\\s\\S]*)");
    		Matcher match=pattern.matcher(source);
    		
    		if(match.find()){
    			System.out.println(match.group(1));
    		}else {
    			System.out.println("not find!");
    		}
    		
    		
    
    	}
    }
    
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值