POI Excel 上下标、下划线、粗体、斜体标签处理(sup、sub、u、strong、em的HTML标签转化到excel格式)①

目前该代码暂不支持多标签嵌套的字符串,只适合多标签、无嵌套字符串。如果有改进,我会第一时间更新代码。

import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;

import org.apache.poi.hssf.usermodel.HSSFCell;
import org.apache.poi.hssf.usermodel.HSSFFont;
import org.apache.poi.hssf.usermodel.HSSFRichTextString;
import org.apache.poi.hssf.usermodel.HSSFRow;
import org.apache.poi.hssf.usermodel.HSSFSheet;
import org.apache.poi.hssf.usermodel.HSSFWorkbook;
import org.apache.poi.ss.usermodel.Font;

public class ExcelUtils {

    private static final String SUB_START = "<sub>";	//下标
    private static final String SUB_END = "</sub>";
    private static final String SUP_START = "<sup>";	//上标
    private static final String SUP_END = "</sup>";
    private static final String U_START = "<u>";		//下划线
    private static final String U_END = "</u>";
    private static final String STRONG_START = "<strong>";		//粗体
    private static final String STRONG_END = "</strong>";
    private static final String EM_START = "<em>";		//斜体
    private static final String EM_END = "</em>";

    public static void main(String[] args) {
        String title = "一种F<sup>-</sup>、Zn<sup>2+</sup>、B<sup>3+</sup>离子协同掺杂电解质,H<sub>2</sub>O是<u>水</u>,<strong>水</strong>是<em>生命</em>之源!<span>span标签测试</span>,"
        		+ "空白下划线:<span class=\"item-blank\"></span>,<span class=\"item-blank\">下划线</span>,空白下划线:<span class=\"item-blank\"></span>,"
        		+ "<table><tbody><tr><td width=\"209\">表格</td></tr></tbody></table>,<span>span标签测试</span>。";
        
        if (title.contains("<span class=\"item-blank\">")) {
        	title = title.replaceAll("(?!<(sup|/sup|sub|/sub|u|/u|strong|/strong|em|/em|span|/span).*?>)<.*?>", "");
        	
        	while (true) {
        		if (title.contains("<span class=\"item-blank\">") && title.contains("<span>")) {
        			if (title.indexOf("<span>")<title.indexOf("<span class=\"item-blank\">")) {
                		title = title.replaceFirst("<span>", "").replaceFirst("</span>", "");
        			}else {
    					title = title.replaceFirst("<span class=\"item-blank\">", "<u>  ").replaceFirst("</span>", "  </u>");
    				}
				}else if (title.contains("<span class=\"item-blank\">")) {
					title = title.replaceFirst("<span class=\"item-blank\">", "<u>  ").replaceFirst("</span>", "  </u>");
				}else if (title.contains("<span>")) {
					title = title.replaceFirst("<span>", "").replaceFirst("</span>", "");
				}else {
					break;
				}
			}
		}
        
		title = title.replaceAll("(?!<(sup|/sup|sub|/sub|u|/u|strong|/strong|em|/em).*?>)<.*?>", "");
        
        List<List<int[]>> tagIndexArr = null;
        if (containTag(title)) {
            tagIndexArr = new ArrayList<List<int[]>>();
            title = getIndexs(title, tagIndexArr);
        }
        
        //TODO 文件路径自己改
        File f = new File("C:\\tmp\\test.xls");
        try {
            FileOutputStream fout = new FileOutputStream(f);
            // 声明一个工作薄
            @SuppressWarnings("resource")
            HSSFWorkbook workbook = new HSSFWorkbook();
            // 生成一个表格
            HSSFSheet sheet = workbook.createSheet("sheet1");
            int curRowIndex = 0;
            HSSFRow row = sheet.createRow(curRowIndex);
            HSSFCell cell = row.createCell(0);

            if (tagIndexArr != null) {
                HSSFRichTextString text = new HSSFRichTextString(title);
                List<int[]> subs = tagIndexArr.get(0);
                List<int[]> sups = tagIndexArr.get(1);
                List<int[]> us = tagIndexArr.get(2);
                List<int[]> strongs = tagIndexArr.get(3);
                List<int[]> ems = tagIndexArr.get(4);
                if (subs.size() > 0) {
                    HSSFFont ft = workbook.createFont();
                    ft.setTypeOffset(HSSFFont.SS_SUB);
                    for (int[] pair : subs) {
                        text.applyFont(pair[0], pair[1], ft);
                    }
                }
                if (sups.size() > 0) {
                    HSSFFont ft = workbook.createFont();
                    ft.setTypeOffset(HSSFFont.SS_SUPER);
                    for (int[] pair : sups) {
                        text.applyFont(pair[0], pair[1], ft);
                    }
                }
                if (us.size() > 0) {
                	HSSFFont ft = workbook.createFont();
                	ft.setUnderline(Font.U_SINGLE);
                	for (int[] pair : us) {
                		text.applyFont(pair[0], pair[1], ft);
                	}
                }
                if (strongs.size() > 0) {
                	HSSFFont ft = workbook.createFont();
                	ft.setBoldweight(HSSFFont.BOLDWEIGHT_BOLD);
                	for (int[] pair : strongs) {
                		text.applyFont(pair[0], pair[1], ft);
                	}
                }
                if (ems.size() > 0) {
                	HSSFFont ft = workbook.createFont();
                	ft.setItalic(true);
                	for (int[] pair : ems) {
                		text.applyFont(pair[0], pair[1], ft);
                	}
                }
                cell.setCellValue(text);
            } else {
    			cell.setCellValue(title);
            }

            try {
                workbook.write(fout);
            } catch (IOException e) {
                // TODO Auto-generated catch block
                e.printStackTrace();
            }
        } catch (FileNotFoundException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }
    }
    
    /**
     * 获取下一对标签的index,不存在这些标签就返回null
     * @param s
     * @param tag SUB_START或者SUP_START或者U_START或者STRONG_START或者EM_START
     * @return int[]中有两个元素,第一个是开始标签的index,第二个元素是结束标签的index
     */
    public static int[] getNextTagsIndex(String s, String tag) {

        int firstStart = s.indexOf(tag);
        if (firstStart > -1) {
            int firstEnd = 0;
            if (tag.equals(SUB_START)) {
            	firstEnd = s.indexOf(SUB_END);
			}else if (tag.equals(SUP_START)) {
				firstEnd = s.indexOf(SUP_END);
			}else if (tag.equals(U_START)) {
				firstEnd = s.indexOf(U_END);
			}else if (tag.equals(STRONG_START)) {
				firstEnd = s.indexOf(STRONG_END);
			}else if (tag.equals(EM_START)) {
				firstEnd = s.indexOf(EM_END);
			}
            if (firstEnd > firstStart) {
                return new int[] { firstStart, firstEnd };
            }
        }
        return null;
    }

    /**移除下一对sub或者sup或者u或者strong或者em标签,返回移除后的字符串
     * @param s
     * @param tag SUB_START或者SUP_START或者U_START或者STRONG_START或者EM_START
     * @return
     */
    public static String removeNextTags(String s, String tag) {
        s = s.replaceFirst(tag, "");
        if (tag.equals(SUB_START)) {
        	s = s.replaceFirst(SUB_END, "");
		}else if (tag.equals(SUP_START)) {
			s = s.replaceFirst(SUP_END, "");
		}else if (tag.equals(U_START)) {
			s = s.replaceFirst(U_END, "");
		}else if (tag.equals(STRONG_START)) {
			s = s.replaceFirst(STRONG_END, "");
		}else if (tag.equals(EM_START)) {
			s = s.replaceFirst(EM_END, "");
		}
        return s;
    }

    /**
     * 判断是不是包含sub、sup、u、strong、em标签
     * @param s
     * @return
     */
    public static boolean containTag(String s) {
        return (s.contains(SUB_START) && s.contains(SUB_END)) || (s.contains(SUP_START) && s.contains(SUP_END))
        		|| (s.contains(U_START) && s.contains(U_END)) || (s.contains(STRONG_START) && s.contains(STRONG_END))
        		|| (s.contains(EM_START) && s.contains(EM_END));
    }

    /**
     * 处理字符串,得到每个sub、sup、u、strong、em标签的开始和对应的结束的标签的index,方便后面根据这个标签做字体操作
     * @param s
     * @param tagIndexList 传一个新建的空list进来,方法结束的时候会存储好标签位置信息。
     * <br>tagIndexList.get(0)存放的sub
     * <br>tagIndexList.get(1)存放的是sup
     * <br>tagIndexList.get(2)存放的是u
     * <br>tagIndexList.get(3)存放的是strong
     * <br>tagIndexList.get(4)存放的是em
     * 
     * @return 返回sub、sup、u、strong、em处理完之后的字符串
     */
    public static String getIndexs(String s, List<List<int[]>> tagIndexList) {
        List<int[]> subs = new ArrayList<int[]>();
        List<int[]> sups = new ArrayList<int[]>();
        List<int[]> us = new ArrayList<int[]>();
        List<int[]> strongs = new ArrayList<int[]>();
        List<int[]> ems = new ArrayList<int[]>();

        while (true) {
            int[] sub_pair = getNextTagsIndex(s, SUB_START);
            int[] sup_pair = getNextTagsIndex(s, SUP_START);
            int[] u_pair = getNextTagsIndex(s, U_START);
            int[] strong_pair = getNextTagsIndex(s, STRONG_START);
            int[] em_pair = getNextTagsIndex(s, EM_START);
            boolean subFirst = false;
            boolean supFirst = false;
            boolean uFirst = false;
            boolean strongFirst = false;
            boolean emFirst = false;
            
        	List a = new ArrayList();
        	if (!StringUtil.isEmpty(sub_pair)) {
				a.add(sub_pair[0]);
			}
        	if (!StringUtil.isEmpty(sup_pair)) {
				a.add(sup_pair[0]);
			}
        	if (!StringUtil.isEmpty(u_pair)) {
				a.add(u_pair[0]);
			}
        	if (!StringUtil.isEmpty(strong_pair)) {
				a.add(strong_pair[0]);
			}
        	if (!StringUtil.isEmpty(em_pair)) {
				a.add(em_pair[0]);
			}
        	
        	Collections.sort(a);
        	if (!StringUtil.isEmpty(sub_pair)) {
            	if (sub_pair[0] == Integer.parseInt(a.get(0).toString())) {
            		subFirst = true;
				}
        	}
        	if (!StringUtil.isEmpty(sup_pair)) {
        		if (sup_pair[0] == Integer.parseInt(a.get(0).toString())) {
					supFirst = true;
        		}
        	}
        	if (!StringUtil.isEmpty(u_pair)) {
        		if (u_pair[0] == Integer.parseInt(a.get(0).toString())) {
					uFirst = true;
				}
        	}
        	if (!StringUtil.isEmpty(strong_pair)) {
        		if (strong_pair[0] == Integer.parseInt(a.get(0).toString())) {
					strongFirst = true;
				}
        	}
        	if (!StringUtil.isEmpty(em_pair)) {
        		if (em_pair[0] == Integer.parseInt(a.get(0).toString())) {
					emFirst = true;
				}
        	}
            	
            if (sub_pair != null && subFirst) {
                s = removeNextTags(s, SUB_START);
                //<sub>标签被去掉之后,结束标签需要相应往前移动
                sub_pair[1] = sub_pair[1] - SUB_START.length();
                subs.add(sub_pair);
                continue;
            }
            if (sup_pair != null && supFirst) {
                s = removeNextTags(s, SUP_START);
                //<sup>标签被去掉之后,结束标签需要相应往前移动
                sup_pair[1] = sup_pair[1] - SUP_START.length();
                sups.add(sup_pair);
                continue;
            }
            if (u_pair != null && uFirst) {
            	s = removeNextTags(s, U_START);
            	//<u>标签被去掉之后,结束标签需要相应往前移动
            	u_pair[1] = u_pair[1] - U_START.length();
            	us.add(u_pair);
            	continue;
            }
            if (strong_pair != null && strongFirst) {
            	s = removeNextTags(s, STRONG_START);
            	//<strong>标签被去掉之后,结束标签需要相应往前移动
            	strong_pair[1] = strong_pair[1] - STRONG_START.length();
            	strongs.add(strong_pair);
            	continue;
            }
            if (em_pair != null && emFirst) {
            	s = removeNextTags(s, EM_START);
            	//<em>标签被去掉之后,结束标签需要相应往前移动
            	em_pair[1] = em_pair[1] - EM_START.length();
            	ems.add(em_pair);
            	continue;
            }
            if (sub_pair == null && sup_pair == null && u_pair == null && strong_pair == null && em_pair == null) {
                break;
            }
        }

        tagIndexList.add(subs);
        tagIndexList.add(sups);
        tagIndexList.add(us);
        tagIndexList.add(strongs);
        tagIndexList.add(ems);
        return s;
    }
}

 

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值