背景:
业务中大量使用正则表达式进行逻辑处理,线上有些极端case会导致cpu使用率暴增,无法解决,只能重启
影响:
1 会对其他业务方产生熔断,可能发生大面积报错
2 如果发生在凌晨,无法及时响应
解决办法:
1 优化正则表达式写法(由于业务配置了非常多正则表达式,比较麻烦,是个长期的过程)
2 代码层面进行超时中断,做兜底处理
下面的代码即是解决办法2的具体处理方式。
需要注意的点:
1 重点是重写CharSequence的charAt方法,在其中进行超时中断的处理
2 强依赖获取当时时间的方法,高并发场景下System.currentTimeMillis()存在性能问题,具体参见SystemClockUtils的处理方式。
代码如下:
pom文件内容
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>org.example</groupId>
<artifactId>tools</artifactId>
<version>1.0-SNAPSHOT</version>
<properties>
<maven.compiler.source>1.8</maven.compiler.source>
<maven.compiler.target>1.8</maven.compiler.target>
</properties>
<dependencies>
<dependency>
<groupId>org.projectlombok</groupId>
<artifactId>lombok</artifactId>
<version>1.18.4</version>
</dependency>
</dependencies>
</project>
具体代码:
package com.wanjinyu.tool.regex;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class RegularExpressionUtils {
/**
* 正则表达式超时时间 单位:毫秒
*/
private static Integer regularExpressionTimeOut = 1000;
public static RegularExpressionResult getRegularExpressionResult(String stringToMatch, String regularExpression) {
Matcher matcher = createMatcherWithTimeout(stringToMatch, regularExpression, regularExpressionTimeOut);
try {
boolean find = matcher.find();
if (find) {
return RegularExpressionResult.builder().find(true).matcher(matcher).build();
}
return RegularExpressionResult.builder().find(false).build();
} catch (Exception e) {
System.out.println("正则表达式超时自动中断." + e.toString());
return RegularExpressionResult.builder().find(false).build();
}
}
private static Matcher createMatcherWithTimeout(String stringToMatch, String regularExpression, int timeoutMillis) {
Pattern pattern = Pattern.compile(regularExpression);
return createMatcherWithTimeout(stringToMatch, pattern, timeoutMillis);
}
private static Matcher createMatcherWithTimeout(String stringToMatch, Pattern regularExpressionPattern,
int timeoutMillis) {
CharSequence charSequence = new TimeoutRegexCharSequence(stringToMatch, timeoutMillis, stringToMatch,
regularExpressionPattern.pattern());
return regularExpressionPattern.matcher(charSequence);
}
private static class TimeoutRegexCharSequence implements CharSequence {
private final CharSequence inner;
private final int timeoutMillis;
private final long timeoutTime;
private final String stringToMatch;
private final String regularExpression;
public TimeoutRegexCharSequence(CharSequence inner, int timeoutMillis, String stringToMatch,
String regularExpression) {
super();
this.inner = inner;
this.timeoutMillis = timeoutMillis;
this.stringToMatch = stringToMatch;
this.regularExpression = regularExpression;
timeoutTime = SystemClockUtils.now() + timeoutMillis;
}
@Override
public char charAt(int index) {
if (SystemClockUtils.now() > timeoutTime) {
throw new RuntimeException(
timeoutMillis + "毫秒后正则表达式仍未得到结果.regularExpression:" + regularExpression + " stringToMatch:"
+ stringToMatch);
}
return inner.charAt(index);
}
@Override
public int length() {
return inner.length();
}
@Override
public CharSequence subSequence(int start, int end) {
return new TimeoutRegexCharSequence(inner.subSequence(start, end), timeoutMillis, stringToMatch,
regularExpression);
}
@Override
public String toString() {
return inner.toString();
}
}
}
package com.wanjinyu.tool.regex;
import java.util.regex.Matcher;
import lombok.AllArgsConstructor;
import lombok.Builder;
import lombok.Data;
import lombok.NoArgsConstructor;
/**
* description: TODO
*
* @date: 2020/8/17 4:35 下午
* @author: wanjinyu
*/
@Data
@Builder
@AllArgsConstructor
@NoArgsConstructor
public class RegularExpressionResult {
private Boolean find;
/**
* 注意:当find为false时,matcher值为null。当find为true时,可以使用matcher进行一些后续处理,记录详细匹配信息
*/
private Matcher matcher;
}
package com.wanjinyu.tool.regex;
import java.sql.Timestamp;
import java.util.concurrent.Executors;
import java.util.concurrent.ScheduledExecutorService;
import java.util.concurrent.ThreadFactory;
import java.util.concurrent.TimeUnit;
import sun.misc.Contended;
/**
* 高并发场景下System.currentTimeMillis()的性能问题的优化
*/
public class SystemClockUtils {
private final long period;
@Contended
private volatile long now;
private SystemClockUtils(long period) {
this.period = period;
this.now = System.currentTimeMillis();
scheduleClockUpdating();
}
private static class InstanceHolder {
public static final SystemClockUtils INSTANCE = new SystemClockUtils(1);
}
private static SystemClockUtils instance() {
return InstanceHolder.INSTANCE;
}
private void scheduleClockUpdating() {
ScheduledExecutorService scheduler = Executors.newSingleThreadScheduledExecutor(new ThreadFactory() {
@Override
public Thread newThread(Runnable runnable) {
Thread thread = new Thread(runnable, "SystemClockUtils");
thread.setDaemon(true);
return thread;
}
});
scheduler.scheduleAtFixedRate(new Runnable() {
@Override
public void run() {
now = System.currentTimeMillis();
}
}, period, period, TimeUnit.MILLISECONDS);
}
private long currentTimeMillis() {
return now;
}
public static long now() {
return instance().currentTimeMillis();
}
public static String nowDate() {
return new Timestamp(instance().currentTimeMillis()).toString();
}
}
package com.wanjinyu.tool.regex;
/**
* description: TODO
*
* @date: 2020/9/22 10:42 上午
* @author: wanjinyu
*/
public class Test {
public static void main(String[] args) {
RegularExpressionResult regularExpressionResult =
RegularExpressionUtils.getRegularExpressionResult("xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", "(x+x+)+y");
if (regularExpressionResult.getFind()) {
System.out.println("匹配成功");
} else {
System.out.println("匹配失败");
}
System.out.println("----------------");
regularExpressionResult =
RegularExpressionUtils.getRegularExpressionResult("xxxxxxx", "x*");
if (regularExpressionResult.getFind()) {
System.out.println("匹配成功");
} else {
System.out.println("匹配失败");
}
}
}
执行test的main方法,结果为: