Java正则达式引起死循环导致服务器负载过高

版权声明:本文为博主原创文章,未经博主允许不得转载。 https://blog.csdn.net/goldenfish1919/article/details/49123787

今天例行top检查服务器的时候,忽然发现负载竟然到了30多!我勒个去啊!

进程16319的cpu负载到了198.3%,出现这么高的负载很有可能有死循环!

jstack打印堆栈,里面有大量的这样的线程在RUNNABLE:

"http-bio-8080-exec-17" daemon prio=10 tid=0x00007f21b06b5800 nid=0x43cd runnable [0x00007f21a4efa000]
   java.lang.Thread.State: RUNNABLE
	at java.util.regex.Pattern$Curly.match0(Pattern.java:4148)
	at java.util.regex.Pattern$Curly.match(Pattern.java:4132)
	at java.util.regex.Pattern$GroupHead.match(Pattern.java:4556)
	at java.util.regex.Pattern$Loop.match(Pattern.java:4683)
	at java.util.regex.Pattern$GroupTail.match(Pattern.java:4615)
	at java.util.regex.Pattern$Curly.match0(Pattern.java:4177)
	at java.util.regex.Pattern$Curly.match(Pattern.java:4132)
	at java.util.regex.Pattern$Ques.match(Pattern.java:4079)
	at java.util.regex.Pattern$Curly.match0(Pattern.java:4170)
	at java.util.regex.Pattern$Curly.match(Pattern.java:4132)
	at java.util.regex.Pattern$GroupHead.match(Pattern.java:4556)
	at java.util.regex.Pattern$Loop.match(Pattern.java:4683)
	at java.util.regex.Pattern$GroupTail.match(Pattern.java:4615)
	at java.util.regex.Pattern$Curly.match0(Pattern.java:4170)
	at java.util.regex.Pattern$Curly.match(Pattern.java:4132)
	at java.util.regex.Pattern$Ques.match(Pattern.java:4079)
	at java.util.regex.Pattern$Curly.match0(Pattern.java:4170)
	at java.util.regex.Pattern$Curly.match(Pattern.java:4132)
	at java.util.regex.Pattern$GroupHead.match(Pattern.java:4556)
	at java.util.regex.Pattern$Loop.match(Pattern.java:4683)
	at java.util.regex.Pattern$GroupTail.match(Pattern.java:4615)
	at java.util.regex.Pattern$Curly.match0(Pattern.java:4170)
	at java.util.regex.Pattern$Curly.match(Pattern.java:4132)
	at java.util.regex.Pattern$Ques.match(Pattern.java:4079)
	at java.util.regex.Pattern$Curly.match0(Pattern.java:4170)
	at java.util.regex.Pattern$Curly.match(Pattern.java:4132)
	at java.util.regex.Pattern$GroupHead.match(Pattern.java:4556)
	at java.util.regex.Pattern$Loop.match(Pattern.java:4683)
	at java.util.regex.Pattern$GroupTail.match(Pattern.java:4615)
	at java.util.regex.Pattern$Curly.match0(Pattern.java:4170)
	at java.util.regex.Pattern$Curly.match(Pattern.java:4132)
	at java.util.regex.Pattern$Ques.match(Pattern.java:4079)
	at java.util.regex.Pattern$Curly.match0(Pattern.java:4170)
	at java.util.regex.Pattern$Curly.match(Pattern.java:4132)
	at java.util.regex.Pattern$GroupHead.match(Pattern.java:4556)
	at java.util.regex.Pattern$Loop.match(Pattern.java:4683)
	at java.util.regex.Pattern$GroupTail.match(Pattern.java:4615)
	at java.util.regex.Pattern$Curly.match0(Pattern.java:4170)
	at java.util.regex.Pattern$Curly.match(Pattern.java:4132)
	at java.util.regex.Pattern$Ques.match(Pattern.java:4079)
	at java.util.regex.Pattern$Curly.match0(Pattern.java:4170)
	at java.util.regex.Pattern$Curly.match(Pattern.java:4132)
	at java.util.regex.Pattern$GroupHead.match(Pattern.java:4556)
	at java.util.regex.Pattern$Loop.match(Pattern.java:4683)
	at java.util.regex.Pattern$GroupTail.match(Pattern.java:4615)
	at java.util.regex.Pattern$Curly.match0(Pattern.java:4170)
	at java.util.regex.Pattern$Curly.match(Pattern.java:4132)
	at java.util.regex.Pattern$Ques.match(Pattern.java:4079)
	at java.util.regex.Pattern$Curly.match0(Pattern.java:4170)
	at java.util.regex.Pattern$Curly.match(Pattern.java:4132)
	at java.util.regex.Pattern$GroupHead.match(Pattern.java:4556)
	at java.util.regex.Pattern$Loop.match(Pattern.java:4683)
	at java.util.regex.Pattern$GroupTail.match(Pattern.java:4615)
	at java.util.regex.Pattern$Curly.match0(Pattern.java:4170)
	at java.util.regex.Pattern$Curly.match(Pattern.java:4132)
	at java.util.regex.Pattern$Ques.match(Pattern.java:4079)
	at java.util.regex.Pattern$Curly.match0(Pattern.java:4170)
	at java.util.regex.Pattern$Curly.match(Pattern.java:4132)
	at java.util.regex.Pattern$GroupHead.match(Pattern.java:4556)
	at java.util.regex.Pattern$Loop.match(Pattern.java:4683)
	at java.util.regex.Pattern$GroupTail.match(Pattern.java:4615)
	at java.util.regex.Pattern$Curly.match0(Pattern.java:4170)
	at java.util.regex.Pattern$Curly.match(Pattern.java:4132)
	at java.util.regex.Pattern$Ques.match(Pattern.java:4079)
	at java.util.regex.Pattern$Curly.match0(Pattern.java:4170)
	at java.util.regex.Pattern$Curly.match(Pattern.java:4132)
	at java.util.regex.Pattern$GroupHead.match(Pattern.java:4556)
	at java.util.regex.Pattern$Loop.match(Pattern.java:4683)
	at java.util.regex.Pattern$GroupTail.match(Pattern.java:4615)
	at java.util.regex.Pattern$Curly.match0(Pattern.java:4170)
	at java.util.regex.Pattern$Curly.match(Pattern.java:4132)
	at java.util.regex.Pattern$Ques.match(Pattern.java:4079)
	at java.util.regex.Pattern$Curly.match0(Pattern.java:4170)
	at java.util.regex.Pattern$Curly.match(Pattern.java:4132)
	at java.util.regex.Pattern$GroupHead.match(Pattern.java:4556)
	at java.util.regex.Pattern$Loop.match(Pattern.java:4683)
	at java.util.regex.Pattern$GroupTail.match(Pattern.java:4615)
	at java.util.regex.Pattern$Curly.match0(Pattern.java:4170)
	at java.util.regex.Pattern$Curly.match(Pattern.java:4132)
	at java.util.regex.Pattern$Ques.match(Pattern.java:4079)
	at java.util.regex.Pattern$Curly.match0(Pattern.java:4170)
	at java.util.regex.Pattern$Curly.match(Pattern.java:4132)
	at java.util.regex.Pattern$GroupHead.match(Pattern.java:4556)
	at java.util.regex.Pattern$Loop.match(Pattern.java:4683)
	at java.util.regex.Pattern$GroupTail.match(Pattern.java:4615)
	at java.util.regex.Pattern$Curly.match0(Pattern.java:4170)
	at java.util.regex.Pattern$Curly.match(Pattern.java:4132)
	at java.util.regex.Pattern$Ques.match(Pattern.java:4079)
	at java.util.regex.Pattern$Curly.match0(Pattern.java:4170)
	at java.util.regex.Pattern$Curly.match(Pattern.java:4132)
	at java.util.regex.Pattern$GroupHead.match(Pattern.java:4556)
	at java.util.regex.Pattern$Loop.match(Pattern.java:4683)
	at java.util.regex.Pattern$GroupTail.match(Pattern.java:4615)
	at java.util.regex.Pattern$Curly.match0(Pattern.java:4170)
	at java.util.regex.Pattern$Curly.match(Pattern.java:4132)
	at java.util.regex.Pattern$Ques.match(Pattern.java:4079)
	at java.util.regex.Pattern$Curly.match0(Pattern.java:4170)
	at java.util.regex.Pattern$Curly.match(Pattern.java:4132)
	at java.util.regex.Pattern$GroupHead.match(Pattern.java:4556)
	at java.util.regex.Pattern$Loop.match(Pattern.java:4683)
	at java.util.regex.Pattern$GroupTail.match(Pattern.java:4615)
	at java.util.regex.Pattern$Curly.match0(Pattern.java:4170)
	at java.util.regex.Pattern$Curly.match(Pattern.java:4132)
	at java.util.regex.Pattern$Ques.match(Pattern.java:4079)
	at java.util.regex.Pattern$Curly.match0(Pattern.java:4170)
	at java.util.regex.Pattern$Curly.match(Pattern.java:4132)
	at java.util.regex.Pattern$GroupHead.match(Pattern.java:4556)
	at java.util.regex.Pattern$Loop.matchInit(Pattern.java:4702)
	at java.util.regex.Pattern$Prolog.match(Pattern.java:4639)
	at java.util.regex.Pattern$Begin.match(Pattern.java:3472)
	at java.util.regex.Matcher.match(Matcher.java:1221)
	at java.util.regex.Matcher.matches(Matcher.java:559)
这个是一个判断邮箱是否合法的方法,里面用了一个正则表达式:

	/**
	 * 验证邮箱
	 * 
	 * @param email
	 * @return
	 */
	public static boolean isEmail(final String email) {
		final String str = "^([a-zA-Z0-9]*[-_]?[a-zA-Z0-9]+)*@([a-zA-Z0-9]*[-_]?[a-zA-Z0-9]+)+[\\.][A-Za-z]{2,3}([\\.][A-Za-z]{2})?$";
		final Pattern p = Pattern.compile(str);
		final Matcher m = p.matcher(email);
		return m.matches();
	}
应该是这个表达是有问题!

apache的commons-validator已经提供了验证邮箱的方法,就不要再自己造轮子:

EmailValidator.getInstance().isValid(email);
</pre>看些具体的实现:<p></p><p></p><pre name="code" class="java">public boolean isValid(String email) {
        if (email == null) {
            return false;
        }

        if (email.endsWith(".")) { // check this first - it's cheap!
            return false;
        }

        // Check the whole email address structure
        Matcher emailMatcher = EMAIL_PATTERN.matcher(email);
        if (!emailMatcher.matches()) {
            return false;
        }

        if (!isValidUser(emailMatcher.group(1))) {
            return false;
        }

        if (!isValidDomain(emailMatcher.group(2))) {
            return false;
        }

        return true;
    }

private static final Pattern EMAIL_PATTERN = Pattern.compile(EMAIL_REGEX);
private static final String EMAIL_REGEX = "^\\s*?(.+)@(.+?)\\s*$";

首先用EMAIL_PATTERN做第一次检验,检验规则是:@前面至少一个字符,@后面至少一个字符

然后分别交验@之前的部分和@之后的部分:

protected boolean isValidUser(String user) {
        return USER_PATTERN.matcher(user).matches();
    }
private static final Pattern USER_PATTERN = Pattern.compile(USER_REGEX);
private static final String USER_REGEX = "^\\s*" + WORD + "(\\." + WORD + ")*$";
private static final String WORD = "((" + VALID_CHARS + "|')+|" + QUOTED_USER + ")";
private static final String VALID_CHARS = "[^\\s" + SPECIAL_CHARS + "]";
private static final String SPECIAL_CHARS = "\\p{Cntrl}\\(\\)<>@,;:'\\\\\\\"\\.\\[\\]";
private static final String QUOTED_USER = "(\"[^\"]*\")";
卧槽,太复杂了,先不看了!

protected boolean isValidDomain(String domain) {
        // see if domain is an IP address in brackets
        Matcher ipDomainMatcher = IP_DOMAIN_PATTERN.matcher(domain);

        if (ipDomainMatcher.matches()) {
            InetAddressValidator inetAddressValidator =
                    InetAddressValidator.getInstance();
            return inetAddressValidator.isValid(ipDomainMatcher.group(1));
        }
        // Domain is symbolic name
        DomainValidator domainValidator =
                DomainValidator.getInstance(allowLocal);
        return domainValidator.isValid(domain) ||
                domainValidator.isValidTld(domain);
    }
private static final Pattern IP_DOMAIN_PATTERN = Pattern.compile(IP_DOMAIN_REGEX);
private static final String IP_DOMAIN_REGEX = "^\\[(.*)\\]$";

分别调用了InetAddressValidator和DomainValidator来验证ip或者是domain。

基本的思想是:先用正则简单验证格式,再用代码验证是否符合逻辑,比如:ip是先验证是否是3个数字一组三个数字一组的四组数字,然后判断是否在0-255之间。

private static final String IPV4_REGEX =
            "^(\\d{1,3})\\.(\\d{1,3})\\.(\\d{1,3})\\.(\\d{1,3})$";

public boolean isValidInet4Address(String inet4Address) {
        // verify that address conforms to generic IPv4 format
        String[] groups = ipv4Validator.match(inet4Address);

        if (groups == null) {
            return false;
        }

        // verify that address subgroups are legal
        for (int i = 0; i <= 3; i++) {
            String ipSegment = groups[i];
            if (ipSegment == null || ipSegment.length() == 0) {
                return false;
            }

            int iIpSegment = 0;

            try {
                iIpSegment = Integer.parseInt(ipSegment);
            } catch(NumberFormatException e) {
                return false;
            }

            if (iIpSegment > 255) {
                return false;
            }

            if (ipSegment.length() > 1 && ipSegment.startsWith("0")) {
                return false;
            }

        }

        return true;
    }

这说明一个问题:正则虽好,切莫乱用!正则还有一个问题就是效率太低,切忌用它来做业务逻辑验证!

上网查了下,这还真不是个例:

http://blog.csdn.net/shixing_11/article/details/5997567

貌似是JDK的一个bug。



阅读更多
换一批

没有更多推荐了,返回首页