场景:公司框架有一个前台导出功能,实际上是把HTML结构通过正则去掉一些没用的样式,然后由统一的工具导出。
但是不支持列表为下拉的样式,比如这种:
观察了工具:是这样的
<%@page contentType="text/html;charset=GB2312"%>
<%@ page import="java.util.regex.*" %>
<jsp:useBean id="xml" scope="session" class="com.nstc.exportkit.excel.ExportExcel"/>
<%
String name=(String)request.getParameter("fileName");
String fileName=name + ".xls";
if(name!=null&&!"".equals(name)){
fileName=java.net.URLEncoder.encode(name+".xls", "utf-8");
}
System.out.println("导出文件名称:"+fileName);
response.reset();
response.setContentType( "application/vnd.ms-excel;charset=GB2312");
response.setHeader("Content-disposition","attachment; filename=\"" + fileName + "\"");
String table = request.getParameter("_ExcelText_");
//bos.write(table);
//bos.close();
if(table.indexOf("</THEAD>")>0 ){
String regex ="(<THEAD.*?>)";
String regex1 ="(</THEAD>)|(<TBODY.*?>)";
Pattern p1 = Pattern.compile(regex1,Pattern.CASE_INSENSITIVE);
Matcher m1 = p1.matcher(table);
String res1= m1.replaceAll("");
Pattern p = Pattern.compile(regex,Pattern.CASE_INSENSITIVE);
Matcher m = p.matcher(res1);
String res= m.replaceAll("<TBODY>");
String regex2 = "<BR>";
Pattern p2 = Pattern.compile(regex2,Pattern.CASE_INSENSITIVE);
Matcher m2 = p2.matcher(res);
String res2 = m2.replaceAll("");
String regex3 = "<INPUT.*?>";
Pattern p3 = Pattern.compile(regex3,Pattern.CASE_INSENSITIVE);
Matcher m3 = p3.matcher(res2);
String res3 = m3.replaceAll("");
String regex4 = "<SCRIPT LANGUAGE=.*?</SCRIPT>";
Pattern p4 = Pattern.compile(regex4,Pattern.CASE_INSENSITIVE);
Matcher m4 = p4.matcher(res3);
String res4 = m4.replaceAll("");
String regex5 = "<TABLE.*?>";
Pattern p5 = Pattern.compile(regex5,Pattern.CASE_INSENSITIVE);
Matcher m5 = p5.matcher(res4);
String res5 = m5.replaceAll("<TABLE>");
/*int i = res5.indexOf("<TR style=\"DISPLAY: none\"");
String res14 = "";
if(i>0){
res14 = res5.substring(0, i);
res14 += "</TBODY></TABLE>";
}else{
res14 = res5;
}*/
String regex14 = "<TR[^>]*?style=\"DISPLAY: none\"[\\s\\S]*?>[\\s\\S]*?</TR>";
Pattern p14 = Pattern.compile(regex14,Pattern.CASE_INSENSITIVE);
Matcher m14 = p14.matcher(res5);
String res14 = m14.replaceAll("");
String regex13 = "<TD[^>]*?style=\"DISPLAY: none\"[\\s\\S]*?>[\\s\\S]*?</TD>";
Pattern p13 = Pattern.compile(regex13,Pattern.CASE_INSENSITIVE);
Matcher m13 = p13.matcher(res14);
String res13 = m13.replaceAll("");
String regex7 = "<TR.*?>";
Pattern p7 = Pattern.compile(regex7,Pattern.CASE_INSENSITIVE);
Matcher m7 = p7.matcher(res13);
String res7 = m7.replaceAll("<TR>");
String regex8 = "<SPAN.*?>";
Pattern p8 = Pattern.compile(regex8,Pattern.CASE_INSENSITIVE);
Matcher m8 = p8.matcher(res7);
String res8 = m8.replaceAll("");
String regex9 = "</SPAN>";
Pattern p9 = Pattern.compile(regex9,Pattern.CASE_INSENSITIVE);
Matcher m9 = p9.matcher(res8);
String res9 = m9.replaceAll("");
String regex10 = "<LABEL.*?>";
Pattern p10 = Pattern.compile(regex10,Pattern.CASE_INSENSITIVE);
Matcher m10 = p10.matcher(res9);
String res10 = m10.replaceAll("");
String regex11 = "</LABEL>";
Pattern p11 = Pattern.compile(regex11,Pattern.CASE_INSENSITIVE);
Matcher m11 = p11.matcher(res10);
String res11 = m11.replaceAll("");
/*
String regex12 = "(?i)<select.*?>.*?<option.*?selected=\"selected\".*?>(.*?)</option>.*?</select>";
Pattern p12 = Pattern.compile(regex12,Pattern.CASE_INSENSITIVE);
Matcher m12 = p12.matcher(res11);
String res12 = m12.replaceAll("$1");
*/
System.out.println(res11);
xml.export(res11,response.getOutputStream());
} else {
String regex2 = "<BR>";
Pattern p2 = Pattern.compile(regex2,Pattern.CASE_INSENSITIVE);
Matcher m2 = p2.matcher(table);
String res2 = m2.replaceAll("");
String regex3 ="(<FONT.*?>)";
Pattern p3 = Pattern.compile(regex3,Pattern.CASE_INSENSITIVE);
Matcher m3 = p3.matcher(res2);
String res3= m3.replaceAll("");
String regex4 ="(</FONT>)";
Pattern p4 = Pattern.compile(regex4,Pattern.CASE_INSENSITIVE);
Matcher m4 = p4.matcher(res3);
String res4= m4.replaceAll("");
String regex5 = "<INPUT.*?>";
Pattern p5 = Pattern.compile(regex5,Pattern.CASE_INSENSITIVE);
Matcher m5 = p5.matcher(res4);
String res5 = m5.replaceAll("");
String regex6 = "<SCRIPT LANGUAGE=(.|\n|\r)*?</SCRIPT>";
Pattern p6 = Pattern.compile(regex6,Pattern.CASE_INSENSITIVE);
Matcher m6 = p6.matcher(res5);
String res6 = m6.replaceAll("");
String regex7 = "<TD[^>]*?style=\"DISPLAY: none\"[\\s\\S]*?>[\\s\\S]*?</TD>";
Pattern p7 = Pattern.compile(regex7,Pattern.CASE_INSENSITIVE);
Matcher m7 = p7.matcher(res6);
String res7 = m7.replaceAll("");
String regex14 = "<TR[^>]*?style=\"DISPLAY: none\"[\\s\\S]*?>[\\s\\S]*?</TR>";
Pattern p14 = Pattern.compile(regex14,Pattern.CASE_INSENSITIVE);
Matcher m14 = p14.matcher(res7);
String res14 = m14.replaceAll("");
String regex9 = "<tr.*?>";
Pattern p9 = Pattern.compile(regex9,Pattern.CASE_INSENSITIVE);
Matcher m9 = p9.matcher(res14);
String res9 = m9.replaceAll("<TR>");
System.out.println(res9);
String regex12 = "(?i)<select.*?>.*?<option.*?selected=\"selected\".*?>(.*?)</option>.*?</select>";
Pattern p12 = Pattern.compile(regex12,Pattern.CASE_INSENSITIVE);
Matcher m12 = p12.matcher(res9);
String res12 = m12.replaceAll("$1");
xml.export(res12,response.getOutputStream());
}
%>
只需要加上获取option里面数据的正则就可以了。注意使用非贪婪匹配和$1占位符
String str = "<LABEL>是否需清理</LABEL></TH><TH colSpan=\"1\" rowSpan=\"1\" class=\"table_bg text-center\" style=\"word-break: keep-all; width: 149px; height: 30px;\"><LABEL>反馈结果</LABEL></TH><TH colSpan=\"1\" rowSpan=\"1\" class=\"table_bg text-center\" style=\"word-break: keep-all; width: 150px; height: 30px;\"><LABEL>不处理原因</LABEL></TH><TH colSpan=\"1\" rowSpan=\"1\" class=\"table_bg text-center\" style=\"word-break: keep-all; width: 90px; height: 30px;\"><LABEL>反馈日期</LABEL></TH></TR></THEAD><TBODY><TR><TD colSpan=\"1\"rowSpan=\"1\" class=\"\" align=\"center\" valign=\"middle\" style=\"vertical-align:middle;padding:0 5px 0 5px;\"></TD><TD colSpan=\"1\" rowSpan=\"1\" class=\"\" align=\"left\" valign=\"middle\" style=\"vertical-align:middle;padding:0 5px 0 5px;\">01001003_01</TD><TD colSpan=\"1\" rowSpan=\"1\" class=\"\" align=\"left\" valign=\"middle\" style=\"vertical-align:middle;padding:0 5px 0 5px;\">3333333310000</TD><TD colSpan=\"1\" rowSpan=\"1\" class=\"\" align=\"left\" valign=\"middle\" style=\"vertical-align:middle;padding:0 5px 0 5px;\">四川电力公司</TD><TD colSpan=\"1\" rowSpan=\"1\" class=\"\" align=\"left\" valign=\"middle\" style=\"vertical-align:middle;padding:0 5px 0 5px;\"></TD><TD colSpan=\"1\" rowSpan=\"1\" class=\"\" align=\"left\" valign=\"middle\" style=\"vertical-align:middle;padding:0 5px 0 5px;\">旧单位21</TD><TD colSpan=\"1\" rowSpan=\"1\" class=\"\" align=\"left\" valign=\"middle\" style=\"vertical-align:middle;padding:0 5px 0 5px;\">中国工商银行</TD><TD colSpan=\"1\" rowSpan=\"1\" class=\"\" align=\"left\" valign=\"middle\" style=\"vertical-align:middle;padding:0 5px 0 5px;\">基本账户</TD><TD colSpan=\"1\" rowSpan=\"1\" class=\"\" align=\"left\" valign=\"middle\" style=\"vertical-align:middle;padding:0 5px 0 5px;\">一般户</TD><TD colSpan=\"1\" rowSpan=\"1\" class=\"\"align=\"left\" valign=\"middle\" style=\"vertical-align:middle;padding:0 5px 0 5px;\">非直联</TD><TD colSpan=\"1\" rowSpan=\"1\"class=\"\" align=\"left\" valign=\"middle\" style=\"vertical-align:middle;padding:0 5px 0 5px;\">正常</TD><TD colSpan=\"1\" rowSpan=\"1\" class=\"\" align=\"left\" valign=\"middle\" style=\"vertical-align:middle;padding:0 5px 0 5px;\">999.99</TD><TD colSpan=\"1\" rowSpan=\"1\" class=\"\" align=\"left\" valign=\"middle\" style=\"vertical-align:middle;padding:0 5px 0 5px;\">2020-03-04</TD><TD colSpan=\"1\" rowSpan=\"1\" class=\"\" align=\"left\" valign=\"middle\" style=\"vertical-align:middle;padding:0 5px 0 5px;\">";
//list.set(i,line.replaceAll("\\sextends\\s+\\w+(?=(\\s+implements\\s+\\w+)?\\s?\\{)",""));
String reg = "<select.*?>.*?<option.*?selected=\"selected\".*?>(.*?)</option>.*?</select>";
System.out.println(str.replaceAll(reg,"$1"));