使用dom4j读取http xml文件,结合XPATH提取数据
使用httpClient、dom4j包
maven依赖
<dependency> <groupId>org.apache.httpcomponents</groupId> <artifactId>httpclient</artifactId> <version>4.0.1</version> </dependency> <dependency> <groupId>dom4j</groupId> <artifactId>dom4j</artifactId> <version>1.6</version> </dependency> <dependency> <groupId>jaxen</groupId> <artifactId>jaxen</artifactId> <version>1.1.1</version> <exclusions> <exclusion> <groupId>com.ibm.icu</groupId> <artifactId>icu4j</artifactId> </exclusion> </exclusions> </dependency>
示例代码:
import java.io.InputStream;
import java.util.Iterator;
import java.util.List;
import org.apache.http.HttpEntity;
import org.apache.http.HttpResponse;
import org.apache.http.client.HttpClient;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.DefaultHttpClient;
import org.dom4j.Document;
import org.dom4j.Element;
import org.dom4j.io.SAXReader;
public class HttpXML {
public static void main(String[] args) throws Exception {
test();
}
public static void test() throws Exception {
SAXReader reader = new SAXReader();
HttpClient client = new DefaultHttpClient();
String url = "http://bbs.btwuji.com/rss.php";
HttpGet httpget = new HttpGet(url);
HttpResponse response = client.execute(httpget);
HttpEntity entity = response.getEntity();
if (entity != null) {
InputStream is = entity.getContent();
Document doc = reader.read(is);
Element root = doc.getRootElement();
List l=doc.selectNodes("//item/title");
Iterator iter=l.iterator();
while(iter.hasNext()){
Element e=(Element)iter.next();
System.out.println(e.getText());
}
}
}
}
参考资料:
XPATH:http://www.w3school.com.cn/xpath/
dom4j:http://www.ibm.com/developerworks/cn/xml/x-dom4j.html/