目前该代码暂不支持多标签嵌套的字符串,只适合多标签、无嵌套字符串。如果有改进,我会第一时间更新代码。
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import org.apache.poi.hssf.usermodel.HSSFCell;
import org.apache.poi.hssf.usermodel.HSSFFont;
import org.apache.poi.hssf.usermodel.HSSFRichTextString;
import org.apache.poi.hssf.usermodel.HSSFRow;
import org.apache.poi.hssf.usermodel.HSSFSheet;
import org.apache.poi.hssf.usermodel.HSSFWorkbook;
import org.apache.poi.ss.usermodel.Font;
public class ExcelUtils {
private static final String SUB_START = "<sub>"; //下标
private static final String SUB_END = "</sub>";
private static final String SUP_START = "<sup>"; //上标
private static final String SUP_END = "</sup>";
private static final String U_START = "<u>"; //下划线
private static final String U_END = "</u>";
private static final String STRONG_START = "<strong>"; //粗体
private static final String STRONG_END = "</strong>";
private static final String EM_START = "<em>"; //斜体
private static final String EM_END = "</em>";
public static void main(String[] args) {
String title = "一种F<sup>-</sup>、Zn<sup>2+</sup>、B<sup>3+</sup>离子协同掺杂电解质,H<sub>2</sub>O是<u>水</u>,<strong>水</strong>是<em>生命</em>之源!<span>span标签测试</span>,"
+ "空白下划线:<span class=\"item-blank\"></span>,<span class=\"item-blank\">下划线</span>,空白下划线:<span class=\"item-blank\"></span>,"
+ "<table><tbody><tr><td width=\"209\">表格</td></tr></tbody></table>,<span>span标签测试</span>。";
if (title.contains("<span class=\"item-blank\">")) {
title = title.replaceAll("(?!<(sup|/sup|sub|/sub|u|/u|strong|/strong|em|/em|span|/span).*?>)<.*?>", "");
while (true) {
if (title.contains("<span class=\"item-blank\">") && title.contains("<span>")) {
if (title.indexOf("<span>")<title.indexOf("<span class=\"item-blank\">")) {
title = title.replaceFirst("<span>", "").replaceFirst("</span>", "");
}else {
title = title.replaceFirst("<span class=\"item-blank\">", "<u> ").replaceFirst("</span>", " </u>");
}
}else if (title.contains("<span class=\"item-blank\">")) {
title = title.replaceFirst("<span class=\"item-blank\">", "<u> ").replaceFirst("</span>", " </u>");
}else if (title.contains("<span>")) {
title = title.replaceFirst("<span>", "").replaceFirst("</span>", "");
}else {
break;
}
}
}
title = title.replaceAll("(?!<(sup|/sup|sub|/sub|u|/u|strong|/strong|em|/em).*?>)<.*?>", "");
List<List<int[]>> tagIndexArr = null;
if (containTag(title)) {
tagIndexArr = new ArrayList<List<int[]>>();
title = getIndexs(title, tagIndexArr);
}
//TODO 文件路径自己改
File f = new File("C:\\tmp\\test.xls");
try {
FileOutputStream fout = new FileOutputStream(f);
// 声明一个工作薄
@SuppressWarnings("resource")
HSSFWorkbook workbook = new HSSFWorkbook();
// 生成一个表格
HSSFSheet sheet = workbook.createSheet("sheet1");
int curRowIndex = 0;
HSSFRow row = sheet.createRow(curRowIndex);
HSSFCell cell = row.createCell(0);
if (tagIndexArr != null) {
HSSFRichTextString text = new HSSFRichTextString(title);
List<int[]> subs = tagIndexArr.get(0);
List<int[]> sups = tagIndexArr.get(1);
List<int[]> us = tagIndexArr.get(2);
List<int[]> strongs = tagIndexArr.get(3);
List<int[]> ems = tagIndexArr.get(4);
if (subs.size() > 0) {
HSSFFont ft = workbook.createFont();
ft.setTypeOffset(HSSFFont.SS_SUB);
for (int[] pair : subs) {
text.applyFont(pair[0], pair[1], ft);
}
}
if (sups.size() > 0) {
HSSFFont ft = workbook.createFont();
ft.setTypeOffset(HSSFFont.SS_SUPER);
for (int[] pair : sups) {
text.applyFont(pair[0], pair[1], ft);
}
}
if (us.size() > 0) {
HSSFFont ft = workbook.createFont();
ft.setUnderline(Font.U_SINGLE);
for (int[] pair : us) {
text.applyFont(pair[0], pair[1], ft);
}
}
if (strongs.size() > 0) {
HSSFFont ft = workbook.createFont();
ft.setBoldweight(HSSFFont.BOLDWEIGHT_BOLD);
for (int[] pair : strongs) {
text.applyFont(pair[0], pair[1], ft);
}
}
if (ems.size() > 0) {
HSSFFont ft = workbook.createFont();
ft.setItalic(true);
for (int[] pair : ems) {
text.applyFont(pair[0], pair[1], ft);
}
}
cell.setCellValue(text);
} else {
cell.setCellValue(title);
}
try {
workbook.write(fout);
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
} catch (FileNotFoundException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
/**
* 获取下一对标签的index,不存在这些标签就返回null
* @param s
* @param tag SUB_START或者SUP_START或者U_START或者STRONG_START或者EM_START
* @return int[]中有两个元素,第一个是开始标签的index,第二个元素是结束标签的index
*/
public static int[] getNextTagsIndex(String s, String tag) {
int firstStart = s.indexOf(tag);
if (firstStart > -1) {
int firstEnd = 0;
if (tag.equals(SUB_START)) {
firstEnd = s.indexOf(SUB_END);
}else if (tag.equals(SUP_START)) {
firstEnd = s.indexOf(SUP_END);
}else if (tag.equals(U_START)) {
firstEnd = s.indexOf(U_END);
}else if (tag.equals(STRONG_START)) {
firstEnd = s.indexOf(STRONG_END);
}else if (tag.equals(EM_START)) {
firstEnd = s.indexOf(EM_END);
}
if (firstEnd > firstStart) {
return new int[] { firstStart, firstEnd };
}
}
return null;
}
/**移除下一对sub或者sup或者u或者strong或者em标签,返回移除后的字符串
* @param s
* @param tag SUB_START或者SUP_START或者U_START或者STRONG_START或者EM_START
* @return
*/
public static String removeNextTags(String s, String tag) {
s = s.replaceFirst(tag, "");
if (tag.equals(SUB_START)) {
s = s.replaceFirst(SUB_END, "");
}else if (tag.equals(SUP_START)) {
s = s.replaceFirst(SUP_END, "");
}else if (tag.equals(U_START)) {
s = s.replaceFirst(U_END, "");
}else if (tag.equals(STRONG_START)) {
s = s.replaceFirst(STRONG_END, "");
}else if (tag.equals(EM_START)) {
s = s.replaceFirst(EM_END, "");
}
return s;
}
/**
* 判断是不是包含sub、sup、u、strong、em标签
* @param s
* @return
*/
public static boolean containTag(String s) {
return (s.contains(SUB_START) && s.contains(SUB_END)) || (s.contains(SUP_START) && s.contains(SUP_END))
|| (s.contains(U_START) && s.contains(U_END)) || (s.contains(STRONG_START) && s.contains(STRONG_END))
|| (s.contains(EM_START) && s.contains(EM_END));
}
/**
* 处理字符串,得到每个sub、sup、u、strong、em标签的开始和对应的结束的标签的index,方便后面根据这个标签做字体操作
* @param s
* @param tagIndexList 传一个新建的空list进来,方法结束的时候会存储好标签位置信息。
* <br>tagIndexList.get(0)存放的sub
* <br>tagIndexList.get(1)存放的是sup
* <br>tagIndexList.get(2)存放的是u
* <br>tagIndexList.get(3)存放的是strong
* <br>tagIndexList.get(4)存放的是em
*
* @return 返回sub、sup、u、strong、em处理完之后的字符串
*/
public static String getIndexs(String s, List<List<int[]>> tagIndexList) {
List<int[]> subs = new ArrayList<int[]>();
List<int[]> sups = new ArrayList<int[]>();
List<int[]> us = new ArrayList<int[]>();
List<int[]> strongs = new ArrayList<int[]>();
List<int[]> ems = new ArrayList<int[]>();
while (true) {
int[] sub_pair = getNextTagsIndex(s, SUB_START);
int[] sup_pair = getNextTagsIndex(s, SUP_START);
int[] u_pair = getNextTagsIndex(s, U_START);
int[] strong_pair = getNextTagsIndex(s, STRONG_START);
int[] em_pair = getNextTagsIndex(s, EM_START);
boolean subFirst = false;
boolean supFirst = false;
boolean uFirst = false;
boolean strongFirst = false;
boolean emFirst = false;
List a = new ArrayList();
if (!StringUtil.isEmpty(sub_pair)) {
a.add(sub_pair[0]);
}
if (!StringUtil.isEmpty(sup_pair)) {
a.add(sup_pair[0]);
}
if (!StringUtil.isEmpty(u_pair)) {
a.add(u_pair[0]);
}
if (!StringUtil.isEmpty(strong_pair)) {
a.add(strong_pair[0]);
}
if (!StringUtil.isEmpty(em_pair)) {
a.add(em_pair[0]);
}
Collections.sort(a);
if (!StringUtil.isEmpty(sub_pair)) {
if (sub_pair[0] == Integer.parseInt(a.get(0).toString())) {
subFirst = true;
}
}
if (!StringUtil.isEmpty(sup_pair)) {
if (sup_pair[0] == Integer.parseInt(a.get(0).toString())) {
supFirst = true;
}
}
if (!StringUtil.isEmpty(u_pair)) {
if (u_pair[0] == Integer.parseInt(a.get(0).toString())) {
uFirst = true;
}
}
if (!StringUtil.isEmpty(strong_pair)) {
if (strong_pair[0] == Integer.parseInt(a.get(0).toString())) {
strongFirst = true;
}
}
if (!StringUtil.isEmpty(em_pair)) {
if (em_pair[0] == Integer.parseInt(a.get(0).toString())) {
emFirst = true;
}
}
if (sub_pair != null && subFirst) {
s = removeNextTags(s, SUB_START);
//<sub>标签被去掉之后,结束标签需要相应往前移动
sub_pair[1] = sub_pair[1] - SUB_START.length();
subs.add(sub_pair);
continue;
}
if (sup_pair != null && supFirst) {
s = removeNextTags(s, SUP_START);
//<sup>标签被去掉之后,结束标签需要相应往前移动
sup_pair[1] = sup_pair[1] - SUP_START.length();
sups.add(sup_pair);
continue;
}
if (u_pair != null && uFirst) {
s = removeNextTags(s, U_START);
//<u>标签被去掉之后,结束标签需要相应往前移动
u_pair[1] = u_pair[1] - U_START.length();
us.add(u_pair);
continue;
}
if (strong_pair != null && strongFirst) {
s = removeNextTags(s, STRONG_START);
//<strong>标签被去掉之后,结束标签需要相应往前移动
strong_pair[1] = strong_pair[1] - STRONG_START.length();
strongs.add(strong_pair);
continue;
}
if (em_pair != null && emFirst) {
s = removeNextTags(s, EM_START);
//<em>标签被去掉之后,结束标签需要相应往前移动
em_pair[1] = em_pair[1] - EM_START.length();
ems.add(em_pair);
continue;
}
if (sub_pair == null && sup_pair == null && u_pair == null && strong_pair == null && em_pair == null) {
break;
}
}
tagIndexList.add(subs);
tagIndexList.add(sups);
tagIndexList.add(us);
tagIndexList.add(strongs);
tagIndexList.add(ems);
return s;
}
}