今天写了一个正则表达式,这个正则表达式用来匹配相应的文件内容,并打印出匹配内容在文件中的行号。
本文所列举的代码是匹配jsp文件中的pageEncoding的正则表达式,pageEncoding的内容位于以 "<%@ page" 开头, 以”%>“结尾的模块中,并且其中可以有其他内容及换行等, 如下所示:
<%@ page ... pageEncoding="windows-31j" %>
代码如下:
package com.regular.test;
import java.io.FileInputStream;
import java.io.InputStream;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class RegularTest {
public static void main(String[] args) throws Exception {
test();
}
public static void test() throws Exception{
Pattern p, ln;
Matcher m;
String reg = "(?s)(.*?<%@[^>]*?page[^>]*?pageEncoding[^>=]*=[^>\"]*)(\"([^\"]*)\"[^>]*?%>)";
p = Pattern.compile(reg);
ln = Pattern.compile("\r\n?");
m = p.matcher(getContent());
int i = 1;
while (m.find()) {
i += getLnCount(m.group(1), ln);
System.out.println( "line : " + i + ", content: " + m.group(3).trim());
i += getLnCount(m.group(2), ln);
}
}
public static int getLnCount(String str, Pattern ln) {
if(str==null) {
return 0;
}
int i=0;
Matcher m = ln.matcher(str);
while(m.find()) {
i++;
}
return i;
}
public static String getContent() throws Exception {
InputStream in = new FileInputStream("aaa.jsp");
byte[] aa = new byte[in.available()];
in.read(aa);
return new String(aa);
}
}
要匹配的文件如下文件如下:
sdf
<%@
page language="java"
contentType="text/html; charset=windows-31j"
%>
<%@
pageEncoding=
"windows-31j" %>
<%@page
pageEncoding="
gbk
" %>
<%@page
pageEncoding="utf-8" %>
<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
<html>
<head>
<meta http-equiv="Content-Type" content="text/html; charset=windows-31j">
<title>Insert title here</title>
</head>
<body>
</body>
</html>