commons-httpclient和htmlparser应用之博客搬家

把以前在百度空间收集的文章搬到javaeye了,主要用到的lib就是commons-httpclient和htmlparser,在此记录下一些关键的代码片段。

 

jar包清单

commons-codec-1.3.jar
commons-httpclient-3.1.jar
commons-lang.jar
commons-logging-1.1.jar
htmlparser.jar
log4j-1.2.15.jar
slf4j-api-1.5.8.jar
slf4j-log4j12-1.5.8.jar

 

扩展 org.apache.commons.httpclient.HttpClient,覆盖其executeMethod方法处理cookie

package util;

import java.io.IOException;

import org.apache.commons.httpclient.Cookie;
import org.apache.commons.httpclient.Header;
import org.apache.commons.httpclient.HttpClient;
import org.apache.commons.httpclient.HttpException;
import org.apache.commons.httpclient.HttpMethod;
import org.apache.commons.httpclient.HttpState;

public class HttpClientEx extends HttpClient {

    private HttpState httpState = new HttpState(); // http状态对象,主要保存cookie
    private String cookie = "";

    public int executeMethod(HttpMethod httpMethod) throws IOException, HttpException {
        String cookie = this.getCookie();
        String uri = httpMethod.getURI().getHost();
        httpState.addCookie(new Cookie(uri, "cookie", cookie, "/", null, false));
        this.setState(httpState);

        int statues = super.executeMethod(httpMethod);

        Header[] headerArray = httpMethod.getResponseHeaders();
        for (Header h : headerArray) {
            if (h.getName().trim().equalsIgnoreCase("Set-Cookie")) {
                if (!this.getCookie().equals("")) { // 如果值不为空
                    this.setCookie(this.getCookie() + ";" + h.getValue());
                } else {
                    this.setCookie(h.getValue());
                }
            }
        }
        return statues;
    }

    public String getCookie() {
        return cookie;
    }

    public void setCookie(String cookie) {
        this.cookie = cookie;
    }

}

 

 get url

String url = HTTP_HI_BAIDU_COM + USER_ID + "/blog";
HttpClient client = new HttpClientEx();
GetMethod getMethod = new GetMethod(url);
client.executeMethod(getMethod);
String body = new String(getMethod.getResponseBody(), getMethod.getResponseCharSet());
getMethod.releaseConnection(); 
logger.debug("日志列表页面\n{}", body);

 

分析html页面中的div元素

Parser parser = Parser.createParser(body, getMethod.getResponseCharSet());
NodeFilter filter = new TagNameFilter("div");
NodeList nodeList = parser.parse(filter);
for (int i = 0; i < nodeList.size(); i++) {
   Div div = (Div) nodeList.elementAt(i);
   if ("m_blog".equals(div.getAttribute("id"))) {
     logger.debug("id为m_blog的div内容\n{}", div.toHtml()); 
   }
}

 

查找含有特定文字的节点集合

NodeList searchFor = div.searchFor("类别");

 

设置User-Agent和post数据字符编码

private static final String USER_AGENT = "Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 5.1; Trident/4.0; iCafeMedia; InfoPath.2)";
private static final String CHARSET = "UTF-8";

HostParams params = new HostParams();
params.setParameter(HttpMethodParams.USER_AGENT,USER_AGENT);
params.setParameter(HttpMethodParams.HTTP_CONTENT_CHARSET, CHARSET);
client.getHostConfiguration().setParams(params);

 

post url

String url = HOST + "/login";
PostMethod postMethod = new PostMethod(url);
postMethod.setParameter("name", "fangwei");
postMethod.setParameter("password", "******");
client.executeMethod(postMethod);

 

 

转载于:https://my.oschina.net/fangwei/blog/717262

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值