一、公用正则
package com.xxx.saas.common.sdk.constant;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* @author zhangs
*/
public class Patterns {
/**
* 匹配空白字符
*/
public static final Pattern WHITE_SPACE_PATTERN = Pattern.compile("\\s+");
/**
* 匹配换行符
*/
public static final Pattern LINE_SEPARATOR = Pattern.compile("[\n\r]");
/**
* 手机号
*/
public static final Pattern MOBILE_PATTERN = Pattern.compile("^((13[0-9])|(14[5|7])|(15([0-3]|[5-9]))|(17[013678])|(18[0,5-9]))\\d{8}$");
/**
* 匹配中文
*/
public static final Pattern CHINESE_PATTERN = Pattern.compile("[\u4e00-\u9fa5]+");
/**
* 各种形式的数量混合匹配
* 原始 ^(([1-9][0-9]*0+)|(\d+\.\d+[kK](pcs|PCS|个|盘|只|支|件|台)?)|((\d+|[零〇一壹二两贰三叁四肆五伍六陆七柒八捌九玖十拾百佰千仟万萬亿億]+)(k|K|pcs|PCS|个|盘|只|支|件|台)))$
*/
public static final Pattern COUNT_PATTERN = Pattern.compile("^((\\d{1,3}(,\\d{3})*|\\d*)(\\.\\d+)?|\\d+|[零〇一壹二两贰三叁四肆五伍六陆七柒八捌九玖十拾百佰千仟万萬亿億]+)?(k|K)?(pcs|PCS|个|盘|只|支|件|台)?$");
/**
* 连续重复6位数字或者字符
*/
public static final Pattern IMMEDIATE_REPETITION_PATTERN = Pattern.compile("([0-9a-zA-Z])\\1{4}");
/**
* 过滤特殊字符
*/
public static final Pattern FILTER_SPECIAL_CHARACTERS_PATTERN = Pattern.compile("[`~!@#$%^&*()+=|{}':;',_\\-\\\\[\\\\].<>/?~!@#¥%……&*()——+|{}【】‘;:”“’。 ,、?]+");
/**
* 特殊字符分割
*/
public static final Pattern SPLIT_SPECIAL_CHARACTERS_PATTERN = Pattern.compile("[`~!@#$%^&*\\\\+=|{}':;\"',_\\[\\].<>/?·¥……{}【】‘;:”“’《》。,、]+");
/*public static void main(String[] args) {
Matcher matcher= COUNT_PATTERN.matcher("235,000.43");
while (matcher.find()){
System.out.println(">>>"+matcher.group());
}
}*/
}
二、基础辅助类(空格/特殊字符)
package com.xxx.saas.common.sdk.util;
import org.apache.commons.lang3.RegExUtils;
import org.apache.commons.lang3.StringUtils;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import com.xxx.saas.common.sdk.constant.Patterns;
/**
* @author zhangs
*/
public class StringProcessor {
/**
* 完全匹配规格
*/
private static final Pattern SPEC_PATTERN = Pattern.compile("^(0201|0302|0402|0504|1608|0603|0604|1010|1080|1090|1110|1175|1245|1212|1256|1290|5025|1310|1312|1313|1395|1413|1412|1717|1814|1815|2010|2012|0805|2016|0806|2020|2424|2515|2518|2520|1008|2322|3025|3010|3012|3015|3018|3020|3216|1206|3218|3225|1210|3232|3330|3530|3532|3838|4032|4010|4012|4015|4018|4020|4024|4026|4028|4030|4442|4516|4532|1812|4540|4641|4740|4747|4842|5040|5047|5010|5012|5014|5015|5020|5022|5024|5028|5030|5045|5353|5451|5452|5550|2220|5752|5754|5755|5757|5852|5855|6010|6012|6014|6016|6020|6025|6028|6030|6033|6040|6045|6050|2525|2512|6440|6545|6560|6767|6865|7066|7068|7028|7030|7032|7040|7045|7050|7055|7070|7165|7366|7368|7373|7565|7570|7525|7540|7575|7665|7870|8070|8028|8030|8038|8040|8043|8050|8065|8345|8580|8540|8780|8961|9070|1515|3131|4040|4922|2510|3013|8532|3519|3113|4924|1616|4308|5050|0403|4513|5315|1306|8527|1007|1207|4045|4949|2828|4141|1006|3640|6855|3825|5832|4825|4027|4841|7832|1111|6562|4031|2727|5151|8787|5931|0202|0306|0303|0404|0505|0508|0606|0612|1052|3921|4120|1062|3920|4033|4527|1282|2958|1577|5930|0408|0815|0830|1020|2395|1216|1218|1225|3720|1530|0616|1835|2043|2412|6464|2728|2726|2725|2817|2816|2818|3637|1122|1435|2258|2208|1505|1005|3737|0502|2615|4525|4318|3014|5322|6327|1913|3523|3725|4029|0510|0705|4528|2309|1406|1002|3060|2526|2040|4124|6227|8035|3512|2207|3522|0804|1806|1810|0204|0304|1411|3829|1808)(\\s+|-|_)[.()a-zA-Z0-9\\s-/±%]+[A-Za-z0-9)]+$");
private static final Pattern KEYWORD_PATTERN_WITH_SPACE = Pattern.compile("^[()a-zA-Z0-9./\\-#_:±%&=\\+\\s]+[A-Za-z0-9)\\+%]+$");
private static final String[] GOODS_CODE_PREFIX = new String[]{"0-0", "1-0", "2-0", "3-0", "4-0", "5-0", "6-0", "7-0", "8-0", "9-0"};
private static final int ELECTRONIC_MIN_KEYWORD_LENGTH = 5;
public static final int ELECTRONIC_MIN_KEYWORD_LENGTH_V2 = 2;
/**
* 特殊符号
*/
private static final Pattern SPECIFIC_SYMBOL_PATTERN = Pattern.compile("^[#_:±%&=\\+\\s]+$");
/**
* 去除字符串前后空格,中间多个空格,替换为一个空格
*
* @param text
* @return
*/
public static String cleanSpace(String text) {
if (StringUtils.isBlank(text)) {
return text;
}
String str = StringUtils.trim(text);
str = RegExUtils.replaceAll(str, Patterns.WHITE_SPACE_PATTERN, " ");
return str;
}
public static boolean isElectronicSpec(String str) {
if (StringUtils.isBlank(str)) {
return false;
}
Matcher matcher = SPEC_PATTERN.matcher(str);
return matcher.find() ;
}
public static boolean isElectronicKeyword(String str) {
if (StringUtils.isBlank(str)) {
return false;
}
Matcher m = KEYWORD_PATTERN_WITH_SPACE.matcher(str);
return m.find() && isValidElectronicKeyword(str);
}
/**
* 对解析后的最后一个属性进行判断,如结尾为K 或 纯数字 则最后一段默认为数字,抛弃该属性
* 0805 332J NPO 8K,0805 332J NPO 800,0805 332J NPO 5 .都将被清洗为0805 332J NPO
*
* @param keyword 原始关键字
* @return 清洗过后的关键字
*/
public static String removeEndCount(String keyword) {
String endCount = getCountInKeyword(keyword);
if (endCount != null) {
return removeEnd(keyword, endCount);
}
return keyword;
}
public static String removeEnd(String keyword, String endCount) {
String text = StringUtils.removeEnd(keyword, endCount);
return StringUtils.trim(text);
}
public static boolean isElectronicCount(String str) {
Matcher matcher = Patterns.COUNT_PATTERN.matcher(str);
return matcher.find();
}
public static boolean isValidElectronicKeyword(String str) {
if (str == null) {
return false;
}
//如果不是纯数字标识的数字,置为不合法
boolean valid = true;
if (isElectronicCount(str)) {
if (!StringUtils.isNumeric(str)) {
valid = false;
}
}
return valid && str.length() >= ELECTRONIC_MIN_KEYWORD_LENGTH && !StringUtils.isAlphaSpace(str);
}
public static String getCountInKeyword(String keyword) {
keyword = StringProcessor.cleanSpace(keyword);
String suffix = StringUtils.substringAfterLast(keyword, " ");
if (!StringUtils.isBlank(suffix)) {
if (isElectronicCount(suffix)) {
return suffix;
}
}
return null;
}
public static String convertValue(String countStr) {
return ChineseNumberUtil.convertChineseNumber(countStr);
}
public static String removeChinese(String text) {
if (StringUtils.isBlank(text)) {
return text;
}
return RegExUtils.removeAll(text, Patterns.CHINESE_PATTERN);
}
public static String formatElectronicProductCode(String goodsCode) {
if (StringUtils.isBlank(goodsCode)) {
return goodsCode;
}
//符合规格,直接返回。不做处理
if (isElectronicSpec(goodsCod