package com.test.html.jsoup;
import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import javax.naming.Context;
import org.jsoup.Connection;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
public abstract class BasicHtmlFramework implements IHtmlBehavior{
protected List<? extends Object> showList;//在子类中使用不需要实例化,直接赋对象引用
public Document jsoupDocument;
public Connection jsoupConnection;
protected String strUrl;// 原网页的链接地址
protected String htmlTitle;// 页面主标题
protected String htmlHeader;// 页面的头
protected List<SubTitle> htmlSubTitles;// 页面的副标题
protected BasicHtmlFramework[] mBasicHtmlFrameworks = new BasicHtmlFramework[10];;
public BasicHtmlFramework() {
// TODO Auto-generated constructor stub
mBasicHtmlFrameworks[0] = this;
System.out.println("mBasicHtmlFrameworks[0] -> " + mBasicHtmlFrameworks[0]);
init();
}
public BasicHtmlFramework(String strUrl){
this.strUrl = strUrl;
init();
}
private void init(){
htmlSubTitles = new ArrayList<>();
}
@Override
public List<? extends Object> getShowList() {
// TODO Auto-generated method stub
if (showList == null){
return null;
}
// Iterator<? extends Object> iterator = showList.iterator();
// while (iterator.hasNext()) {
// System.out.println(iterator.next().toString());
// }
return showList;
}
@Override
public void doGetHtmlJsoupDocument(String urlParam) {
// TODO Auto-generated method stub
setStrUrl(urlParam);
jsoupConnection = Jsoup.connect(urlParam).timeout(5000);
}
@Override
public void doParseHtmlDocument(int param) throws IOException {
// TODO Auto-generated method stub
if (param < 0){
throw new IOException("unexpected param's value required : greater param than 0 : " + param);
}
}
@Override
public void doGetHtmlContent(String htmlContent) {
// TODO Auto-generated method stub
}
public String getStrUrl() {
return strUrl;
}
public void setStrUrl(String strUrl) {
this.strUrl = strUrl;
}
public String getHtmlTitle() {
return htmlTitle;
}
public void setHtmlTitle(String htmlTitle) {
this.htmlTitle = htmlTitle;
}
public String getHtmlHeader() {
return htmlHeader;
}
public void setHtmlHeader(String htmlHeader) {
this.htmlHeader = htmlHeader;
}
public List<SubTitle> getHtmlSubTitles() {
return htmlSubTitles;
}
public void setHtmlSubTitles(List<SubTitle> htmlSubTitles) {
this.htmlSubTitles = htmlSubTitles;
}
@Override
public String toString() {
return "BasicHtmlFramework [htmlTitle=" + htmlTitle + ", htmlHeader="
+ htmlHeader + ", htmlSubTitles=" + htmlSubTitles + "]";
}
public class SubTitle{
private String subTitle;
private String subTitleLink;
public String getSubTitle() {
return subTitle;
}
public void setSubTitle(String subTitle) {
this.subTitle = subTitle;
}
public String getSubTitleLink() {
return subTitleLink;
}
public void setSubTitleLink(String subTitleLink) {
this.subTitleLink = subTitleLink;
}
public URL getUrlFromLink(){
try {
return new URL(subTitleLink);
} catch (MalformedURLException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
return null;
}
}
}
上述定义此次解析HTML的抽象类和接口的实现
package com.test.html.jsoup;
import java.io.IOException;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
public class RongShuXiaHtmlFramework extends BasicHtmlFramework{
public RongShuXiaHtmlFramework(String strUrl) {
super(strUrl);
// TODO Auto-generated constructor stub
}
public RongShuXiaHtmlFramework() {
// TODO Auto-generated constructor stub
}
private List<AllShowBook> books = new LinkedList<>();
private List<RSXBookTypes> bookTypes = new LinkedList<>();
private String h2TitleBookType;
public List<? extends RSXBookTypes> getBookTypes(){
return bookTypes;
}
public List<? extends RSXBookTypes> getBooks(){
return books;
}
public String getBookTypeTitle(){
return h2TitleBookType;
}
private synchronized void handleHtmlBookTypes(){// 解析图书类型
Element element = jsoupDocument.getElementsByAttributeValue("class", "showContentLeft").first();
h2TitleBookType = element.select("h2").text();
Element elType = element.getElementsByAttributeValue("class", "clear").first();
Elements elements = elType.getElementsByTag("li");
for (Element el : elements){
RSXBookTypes raxBookTypes = new RSXBookTypes();
String nameString = el.getElementsByTag("a").text();
String link = el.select("a").attr("href").trim();
String count = el.getElementsByTag("span").text();
if (nameString.contains(count)){
int index = nameString.indexOf(count);
nameString = nameString.substring(0, index);
}
raxBookTypes.typeBookCount = count;
raxBookTypes.typeBookLink = link;
raxBookTypes.typeBookName = nameString;
bookTypes.add(raxBookTypes);
}
showList = bookTypes;
}
private synchronized void handleHtmlTitle(){// 解析标题
htmlTitle = jsoupDocument.head().getElementsByTag("title").text().trim();
Element elementContents = jsoupDocument.getElementById("nav");
Elements titleElements = elementContents.getElementsByTag("li");
//documentContent.getElementsByClass(HtmlTagName.ulClass);//该方法是包含的关系
if (titleElements == null){
return;
}
for (Element links : titleElements){
String linkUrl ;
String title = links.getElementsByTag("a").text();
String link = links.select("a").attr("href").trim();
if (link == null || link.equals("")){
linkUrl = strUrl;
} else {
linkUrl = link;
}
SubTitle html = new SubTitle();
html.setSubTitleLink(linkUrl);
html.setSubTitle(title);
htmlSubTitles.add(html);
}
showList = htmlSubTitles;
}
@Override
public void doGetHtmlJsoupDocument(String urlParam) {
// TODO Auto-generated method stub
super.doGetHtmlJsoupDocument(urlParam);
if (jsoupConnection == null){
return;
}
try {
jsoupDocument = jsoupConnection.get();// 可能会使用post方式获取对象的实例
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
@Override
public synchronized void doParseHtmlDocument(int param) {
// TODO Auto-generated method stub
if (jsoupDocument == null){
return;
}
switch (param) {
case 0:
handleHtmlTitle();
break;
case 1:
handleHtmlBookTypes();
break;
default:
break;
}
}
@Override
public void doGetHtmlContent(String htmlContent) {
// TODO Auto-generated method stub
}
public class RSXBookTypes{
public String typeBookCount;
public String typeBookName;
public String typeBookLink;
}
public class AllShowBook extends RSXBookTypes{
public String picLink;
}
}
具体的实现类内容如上
相关DEMO下载:
http://download.csdn.net/detail/tangzhide/9647865