package com.ninemax.reptile;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.net.MalformedURLException;
import java.net.URL;
import java.net.URLConnection;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
public class JsoupUtil {
public static void main(String[] args) {
String result = getHtml("http://tech.qq.com/", "GBK");
List<Map<String,Object>> list = parseHtml(result);
for(Map<String,Object> map : list){
System.out.println(map);
}
}
/**
* 获取页面信息
*
* @param url
* @param encoding
* @return
*/
public static String getHtml(String url, String encoding) {
URL connection = null;
URLConnection c = null;
InputStreamReader isr = null;
BufferedReader br = null;
// 存放页面信息的容器
StringBuffer buffer = new StringBuffer();
try {
// 创建网络连接
connection = new URL(url);
// 打开连接
c = connection.openConnection();
// 越过防火墙
//c.addRequestProperty("http", "darker");
// 输入流
isr = new InputStreamReader(c.getInputStream(), encoding);
// 输入流放入缓存中
br = new BufferedReader(isr);
// 循环读取页面信息
String content = "";
while ((content = br.readLine()) != null) {
buffer.append(content.trim() + "\n");
}
} catch (MalformedURLException e) {
e.printStackTrace();
System.out.println("网络连接失败!");
} catch (IOException e) {
e.printStackTrace();
System.out.println("网络连接打开失败!");
}
return buffer.toString();
}
/**
* 页面解析
*
* @return
*/
public static List<Map<String, Object>> parseHtml(String html) {
// 接收结果集容器
List<Map<String, Object>> list = new ArrayList<Map<String, Object>>();
Map<String, Object> map = new HashMap<String,Object>();
// 解析源代码
Document document = Jsoup.parse(html);
// 获取源代码指定的标签集合
Elements elements = document.getElementsByTag("img");
// 获取标签的属性
for (Element element : elements) {
String src = element.attr("src");
String alt = element.attr("alt");
map.put("src", src.trim());
map.put("alt", alt.trim());
list.add(map);
// 清空map
map = new HashMap<String,Object>();
}
return list;
}
}
运行结果
利用到的JAR:jsoup-1.10.2.jar