epublib 按指定层级拆分电子书

import java.io.File;
import java.io.FileOutputStream;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Set;

import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.NodeList;

import nl.siegmann.epublib.domain.Book;
import nl.siegmann.epublib.domain.MediaType;
import nl.siegmann.epublib.domain.Resource;
import nl.siegmann.epublib.domain.TOCReference;
import nl.siegmann.epublib.domain.TableOfContents;
import nl.siegmann.epublib.epub.EpubReader;
import nl.siegmann.epublib.epub.EpubWriter;
import nl.siegmann.epublib.service.MediatypeService;
import nl.siegmann.epublib.util.ResourceUtil;

public class EpubRead {
	static List<TOCReference> tbList = new ArrayList<TOCReference>();

	@SuppressWarnings("rawtypes")
	public static void main(String[] args) {
		EpubReader epubReader = new EpubReader();
		try {
			MediaType[] lazyTypes = { MediatypeService.CSS, MediatypeService.GIF, MediatypeService.JPG,
					MediatypeService.PNG };
			String fileName = "C:\\TEMP\\sc.epub";
			Book book = epubReader.readEpubLazy(fileName, "UTF-8", Arrays.asList(lazyTypes));
			TableOfContents tableOfContents = book.getTableOfContents();
			List<TOCReference> refs = tableOfContents.getTocReferences();
			for (TOCReference ref : refs) {
				getNode(ref, 0);
			}

			for (TOCReference ref : tbList) {
				List<TOCReference> tocList = ref.getChildren();

				Book b = new Book();

				b.getMetadata().addTitle(ref.getTitle());
				// 封面从带图片的网页中获取

				Document document = ResourceUtil.getAsDocument(ref.getResource());
				NodeList nodeList = document.getElementsByTagName("img");
				Element element1 = (Element) nodeList.item(0);
				String coverHref = element1.getAttribute("src");
				coverHref = coverHref.substring(3, coverHref.length());
				Resource resource1 = book.getResources().getByHref(coverHref);
				b.setCoverImage(resource1);

				for (TOCReference tocReference : tocList) {
					// 将指定document中的内容替换
					Resource r = tocReference.getResource();
					String s = new String(r.getData());
					if ("目录".equals(tocReference.getTitle())) {
						s = s.replaceAll("<a href=\"part0001.xhtml\">返回总目录</a>", "");
						Resource rr = new Resource(s.getBytes(), r.getHref());
						b.addSection(tocReference.getTitle(), rr);
						System.out.println("------\n" + new String(rr.getData()));
					} else {

						b.addSection(tocReference.getTitle(), r);
					}

					Document doc = ResourceUtil.getAsDocument(r);
					Set<String> cssSet = new HashSet<String>();
					Set<String> imgSet = new HashSet<String>();
					NodeList cssList = doc.getElementsByTagName("link");
					NodeList imgList = doc.getElementsByTagName("img");
					for (int i = 0; i < cssList.getLength(); i++) {
						Element element = (Element) cssList.item(i);
						String href = element.getAttribute("href");

						href = href.substring(3, href.length());

						cssSet.add(href);

					}
					for (int i = 0; i < imgList.getLength(); i++) {
						Element element = (Element) imgList.item(i);
						String href = element.getAttribute("src");

						href = href.substring(3, href.length());
						imgSet.add(href);

					}
					for (Iterator iterator = cssSet.iterator(); iterator.hasNext();) {
						String href = (String) iterator.next();
						Resource resource = book.getResources().getByHref(href);
						b.addResource(resource);

					}

					for (Iterator iterator = imgSet.iterator(); iterator.hasNext();) {
						String href = (String) iterator.next();
						Resource resource = book.getResources().getByHref(href);
						b.addResource(resource);

					}

				}
				EpubWriter epubWriter = new EpubWriter();
				epubWriter.write(b, new FileOutputStream(new File("C:\\TEMP\\output\\" + ref.getTitle() + ".epub")));
			}

		} catch (Exception e) {
			e.printStackTrace();
		} finally {
		}

	}

	public static void getNode(TOCReference toc, Integer level) {
		List<TOCReference> tocList = toc.getChildren();
		if (tocList != null && tocList.size() > 0) {// 判断是否查到,没有了表示小的了

			if (level == 2) {
				tbList.add(toc);
			}
			for (int i = 0; i < tocList.size(); i++) {
				getNode(tocList.get(i), level + 1);

			}
		}

	}

}

  • 1
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值