import java.io.File;
import java.io.FileOutputStream;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Set;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.NodeList;
import nl.siegmann.epublib.domain.Book;
import nl.siegmann.epublib.domain.MediaType;
import nl.siegmann.epublib.domain.Resource;
import nl.siegmann.epublib.domain.TOCReference;
import nl.siegmann.epublib.domain.TableOfContents;
import nl.siegmann.epublib.epub.EpubReader;
import nl.siegmann.epublib.epub.EpubWriter;
import nl.siegmann.epublib.service.MediatypeService;
import nl.siegmann.epublib.util.ResourceUtil;
public class EpubRead {
static List<TOCReference> tbList = new ArrayList<TOCReference>();
@SuppressWarnings("rawtypes")
public static void main(String[] args) {
EpubReader epubReader = new EpubReader();
try {
MediaType[] lazyTypes = { MediatypeService.CSS, MediatypeService.GIF, MediatypeService.JPG,
MediatypeService.PNG };
String fileName = "C:\\TEMP\\sc.epub";
Book book = epubReader.readEpubLazy(fileName, "UTF-8", Arrays.asList(lazyTypes));
TableOfContents tableOfContents = book.getTableOfContents();
List<TOCReference> refs = tableOfContents.getTocReferences();
for (TOCReference ref : refs) {
getNode(ref, 0);
}
for (TOCReference ref : tbList) {
List<TOCReference> tocList = ref.getChildren();
Book b = new Book();
b.getMetadata().addTitle(ref.getTitle());
// 封面从带图片的网页中获取
Document document = ResourceUtil.getAsDocument(ref.getResource());
NodeList nodeList = document.getElementsByTagName("img");
Element element1 = (Element) nodeList.item(0);
String coverHref = element1.getAttribute("src");
coverHref = coverHref.substring(3, coverHref.length());
Resource resource1 = book.getResources().getByHref(coverHref);
b.setCoverImage(resource1);
for (TOCReference tocReference : tocList) {
// 将指定document中的内容替换
Resource r = tocReference.getResource();
String s = new String(r.getData());
if ("目录".equals(tocReference.getTitle())) {
s = s.replaceAll("<a href=\"part0001.xhtml\">返回总目录</a>", "");
Resource rr = new Resource(s.getBytes(), r.getHref());
b.addSection(tocReference.getTitle(), rr);
System.out.println("------\n" + new String(rr.getData()));
} else {
b.addSection(tocReference.getTitle(), r);
}
Document doc = ResourceUtil.getAsDocument(r);
Set<String> cssSet = new HashSet<String>();
Set<String> imgSet = new HashSet<String>();
NodeList cssList = doc.getElementsByTagName("link");
NodeList imgList = doc.getElementsByTagName("img");
for (int i = 0; i < cssList.getLength(); i++) {
Element element = (Element) cssList.item(i);
String href = element.getAttribute("href");
href = href.substring(3, href.length());
cssSet.add(href);
}
for (int i = 0; i < imgList.getLength(); i++) {
Element element = (Element) imgList.item(i);
String href = element.getAttribute("src");
href = href.substring(3, href.length());
imgSet.add(href);
}
for (Iterator iterator = cssSet.iterator(); iterator.hasNext();) {
String href = (String) iterator.next();
Resource resource = book.getResources().getByHref(href);
b.addResource(resource);
}
for (Iterator iterator = imgSet.iterator(); iterator.hasNext();) {
String href = (String) iterator.next();
Resource resource = book.getResources().getByHref(href);
b.addResource(resource);
}
}
EpubWriter epubWriter = new EpubWriter();
epubWriter.write(b, new FileOutputStream(new File("C:\\TEMP\\output\\" + ref.getTitle() + ".epub")));
}
} catch (Exception e) {
e.printStackTrace();
} finally {
}
}
public static void getNode(TOCReference toc, Integer level) {
List<TOCReference> tocList = toc.getChildren();
if (tocList != null && tocList.size() > 0) {// 判断是否查到,没有了表示小的了
if (level == 2) {
tbList.add(toc);
}
for (int i = 0; i < tocList.size(); i++) {
getNode(tocList.get(i), level + 1);
}
}
}
}
epublib 按指定层级拆分电子书
最新推荐文章于 2024-07-26 14:42:54 发布