使用httpClient和httpParser获取指定网址的title


package com.xinhuanet.cloudDesk.controller;

import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.apache.commons.httpclient.HttpClient;
import org.apache.commons.httpclient.HttpConnectionManager;
import org.apache.commons.httpclient.methods.GetMethod;
import org.apache.commons.httpclient.params.HttpConnectionManagerParams;
import org.htmlparser.Parser;
import org.htmlparser.visitors.HtmlPage;

public class R {
public static void main(String[] args) throws Exception {

HttpClient httpClient = new HttpClient();
httpClient.getHostConfiguration().setProxy("202.84.17.41", 8080);

HttpConnectionManager httpConnManager = httpClient
.getHttpConnectionManager();

if (httpConnManager != null) {
HttpConnectionManagerParams mgrParams = new HttpConnectionManagerParams();
mgrParams.setSoTimeout(20000000);
mgrParams.setTcpNoDelay(true);
mgrParams.setConnectionTimeout(20000000);
mgrParams.setLinger(0);
mgrParams.setStaleCheckingEnabled(false);
httpConnManager.setParams(mgrParams);
}

String url = "http://www.poetry4cn.com";
GetMethod methodGet = new GetMethod(url);
httpClient.executeMethod(methodGet);
String charset = getCharSet(new String(methodGet.getResponseBody()));
System.out.println("getCharSet:" + charset);
String responseGet = new String(methodGet.getResponseBody(), charset);
System.out.println(responseGet);



Parser myParser = Parser.createParser(responseGet.toString(), charset);
HtmlPage visitor = new HtmlPage(myParser);
myParser.visitAllNodesWith(visitor);
String textInPage = visitor.getTitle();
System.out.println("title:" + textInPage);

}

public static String getCharSet(String content) {
// String regex = ".*charset=([^;]*).*";
String regex = "<meta.+?charset=[^\\w]?([-\\w]+)";
Pattern pattern = Pattern.compile(regex);
Matcher matcher = pattern.matcher(content);
if (matcher.find())
return matcher.group(1);
else
return null;
}

}
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值