1.libs中导入jar包:
compile files('libs/jsoup-1.7.2.jar')
2.用于指定查询url,method,params(自己get和set)
/**
* 规则类
*/
public class Rule {
/**
* 链接
*/
private String url;
/**
* 参数集合
*/
private String[] params;
/**
* 参数对应的值
*/
private String[] values;
/**
* 对返回的HTML,第一次过滤所用的标签,请先设置type
*/
private String resultTagName;
/**
* CLASS / ID / SELECTION
* 设置resultTagName的类型,默认为ID
*/
private int type = ID;
/**
* GET / POST
* 请求的类型,默认GET
*/
private int requestMoethod = GET;
public final static int GET = 0;
public final static int POST = 1;
public final static int CLASS = 0;
public final static int ID = 1;
public final static int SELECTION = 2;
public Rule() {
}
public Rule(String url, String[] params, String[] values,
String resultTagName, int type, int requestMoethod) {
super();
this.url = url;
this.params = params;
this.values = values;
this.resultTagName = resultTagName;
this.type = type;
this.requestMoethod = requestMoethod;
}
}
3.数据对象
public class LinkTypeData {
private int id;
/**
* 链接的地址
*/
private String linkHref;
/**
* 链接的标题
*/
private String linkText;
/**
* 摘要
*/
private String summary;
/**
* 内容
*/
private String content;
}
4.查询类
public class ExtractService {
public static List<LinkTypeData> e;
public static void getJsoup(final Rule rule) {
// 8、使用线程执行访问服务器,获取返回信息后通知主线程更新UI或者提示信息。
final Handler handler = new Handler() {
@Override
public void handleMessage(Message msg) {
if (msg.what == 1) {
printf(e);
}
}
};
// 启动线程来执行任务
new Thread() {
public void run() {
// 请求网络
e = extract(rule);
Message m = new Message();
m.what = 1;
// 发送消息到Handler
handler.sendMessage(m);
}
}.start();
}
public static void printf(List<LinkTypeData> datas) {
for (LinkTypeData data : datas) {
Log.e("CJY", "A=" + data.getLinkText());
Log.e("CJY", "B=" + data.getLinkHref());
Log.e("CJY", "C=" + data.getId());
Log.e("CJY", "D=" + data.getContent());
Log.e("CJY", "E=" + data.getSummary());
Log.e("CJY", "F=CJYCJYCJYCJYCJYCJY");
}
}
/**
* @param rule
* @return
*/
public static List<LinkTypeData> extract(Rule rule) {
// 进行对rule的必要校验
validateRule(rule);
List<LinkTypeData> datas = new ArrayList<LinkTypeData>();
LinkTypeData data = null;
try {
/**
* 解析rule
*/
String url = rule.getUrl();
String[] params = rule.getParams();
String[] values = rule.getValues();
String resultTagName = rule.getResultTagName();
int type = rule.getType();
int requestType = rule.getRequestMoethod();
Connection conn = Jsoup.connect(url);
// 设置查询参数
if (params != null) {
for (int i = 0; i < params.length; i++) {
conn.data(params[i], values[i]);
}
}
// 设置请求类型
Document doc = null;
switch (requestType) {
case Rule.GET:
doc = conn.timeout(100000).get();
break;
case Rule.POST:
doc = conn.timeout(100000).post();
break;
}
//处理返回数据
Elements results = new Elements();
switch (type) {
case Rule.CLASS:
results = doc.getElementsByClass(resultTagName);
break;
case Rule.ID:
Element result = doc.getElementById(resultTagName);
results.add(result);
break;
case Rule.SELECTION:
results = doc.select(resultTagName);
break;
default:
//当resultTagName为空时默认去body标签
if (TextUtils.isEmpty(resultTagName)) {
results = doc.getElementsByTag("body");
}
}
for (Element result : results) {
Elements links = result.getElementsByTag("a");
for (Element link : links) {
//必要的筛选
String linkHref = link.attr("href");
String linkText = link.text();
data = new LinkTypeData();
data.setLinkHref(linkHref);
data.setLinkText(linkText);
datas.add(data);
}
}
} catch (IOException e) {
e.printStackTrace();
}
return datas;
}
/**
* 对传入的参数进行必要的校验
*/
private static void validateRule(Rule rule) {
String url = rule.getUrl();
if (TextUtils.isEmpty(url)) {
throw new RuleException("url不能为空!");
}
if (!url.startsWith("http://")) {
throw new RuleException("url的格式不正确!");
}
if (rule.getParams() != null && rule.getValues() != null) {
if (rule.getParams().length != rule.getValues().length) {
throw new RuleException("参数的键值对个数不匹配!");
}
}
}
}
5.异常类
public class RuleException extends RuntimeException {
public RuleException() {
super();
// TODO Auto-generated constructor stub
}
public RuleException(String message, Throwable cause) {
super(message, cause);
// TODO Auto-generated constructor stub
}
public RuleException(String message) {
super(message);
// TODO Auto-generated constructor stub
}
public RuleException(Throwable cause) {
super(cause);
// TODO Auto-generated constructor stub
}
}
6.使用方法:
Rule rule = new Rule("http://gold.cnfol.com/mingjiadianjin/#",
new String[]{}, new String[]{},
"ul.ColumnList li", Rule.SELECTION, Rule.GET);
ExtractService.getJsoup(rule);