HttpClient 模拟登录并解析网页数据


//Post Method 例子(模拟登录)

public class PostExample {

private static HttpClient client;

private static Cookie[] cookies;

private static String WEB_SITE = "http://www.xxx.cn";

private static String LOGIN = "http://www.xxx.cn/login.html";

private static String INDEX = "http://www.xxx.cn/index.html";

private static int WEB_PORT = 80;

private static String USER_NAME = "username";

private static String PASSWORD = "password";

static {

client = new HttpClient();

client.getHttpConnectionManager().getParams().setSoTimeout(15000);

client.getHttpConnectionManager().getParams().setConnectionTimeout(15000);

cookies = client.getState().getCookies();

}

private static void testLogin() {

client.getHostConfiguration().setHost(WEB_SITE, WEB_PORT);

GetMethod get = new GetMethod(LOGIN);

GetExample.processGet(client, get, cookies, false, false);

PostMethod post = new PostMethod(LOGIN);

NameValuePair[] params = new NameValuePair[] { new NameValuePair("email", USER_NAME),
new NameValuePair("pass", PASSWORD), new NameValuePair("remember", "1"),

new NameValuePair("goto", "/index.html") };

processPost(client, post, params, cookies, false, false);

Header header = post.getResponseHeader("location");

String url = header.getValue();

if (url.equals(INDEX)) {

System.out.println("登录成功!");

} else {

System.out.println("登录失败,请检查请求参数以及url是否正确...");

return;

}

get = new GetMethod(url);

String result = GetExample.processGet(client, get, cookies, false, true);

//将得到的结果集写到文件里

String filePath = Util.writerFile(result);

//解析html

//ParserExample.parserHtml(filePath);

}


public static String processPost(HttpClient client, PostMethod post, NameValuePair[] params, Cookie[] cookies,

boolean needAppendCookies, boolean needResponse) {

try {

post.setRequestHeader("User-Agent",

"Mozilla/5.0 (Windows; U; Windows NT 5.1; zh-CN; rv:1.9.2.10) Gecko/20100914 Firefox/3.6.10");

if (params != null && params.length > 0) {

post.setRequestBody(params);

}

if (cookies != null) {

post.setRequestHeader("cookie", cookies.toString());

}

// 设置post方法请求超时为 10秒

post.getParams().setParameter(HttpMethodParams.SO_TIMEOUT, 10000);

client.executeMethod(post);

if (needAppendCookies) {

cookies = client.getState().getCookies();

client.getState().addCookies(cookies);

}

if (needResponse) {

return post.getResponseBodyAsString();

}

} catch (HttpException e) {

e.printStackTrace();

} catch (IOException e) {

e.printStackTrace();

} finally {

client.getParams().clear();

post.releaseConnection();

try {

Thread.sleep(3000);

} catch (InterruptedException e) {

e.printStackTrace();

}
}

return null;

}

public static void main(String[] args) {

testLogin();

}

}



//将get,post响应的result写到文件里

public class Util {

public static String writerFile(String result) {

File file = new File("d:\\" + UUID.randomUUID() + ".html");

byte[] bytes = new byte[1024 * 3];

bytes = result.getBytes();

FileOutputStream fos;

try {

fos = new FileOutputStream(file);

fos.write(bytes, 0, bytes.length);

fos.flush();

fos.close();

return file.getAbsolutePath();

} catch (FileNotFoundException e) {

e.printStackTrace();

} catch (IOException e) {

e.printStackTrace();

}

return null;

}
}


//以htmlparser和正则两种方式解析得到网页上的内容

public class ParserExample {

public static void parserHtml(String filePath) {

parserName(filePath);

parserMemberCount();

}

//解析会员名
private static void parserName(String filePath) {

try {

Parser parser = new Parser(filePath);

parser.setEncoding("gbk");

NodeFilter filter = new AndFilter(new TagNameFilter("a"), new HasAttributeFilter("href",

"http://www.xxx.cn/member/index.html"));

NodeList nodeList = parser.extractAllNodesThatMatch(filter);

if (nodeList != null) {

LinkTag node = (LinkTag) nodeList.elementAt(0);

String name = node.getChildren().elementAt(0).toPlainTextString();

if (name == null) {

System.out.println("解析姓名出错,请检查网页结构是否发生变化");

} else {

System.out.println("姓名:" + name);

}

}

} catch (ParserException e) {

e.printStackTrace();

}

}


//解析网站会员数
private static void parserMemberCount() {

HttpClient client = new HttpClient();

Cookie[] cookies = client.getState().getCookies();

client.getHostConfiguration().setHost("http://www.xxx.cn", 80);

GetMethod get = new GetMethod("http://www.xxx.cn/ajax/memberCount.html");

String result = GetExample.processGet(client, get, cookies, false, true);

Pattern pattern = Pattern.compile("var value=(.*?);");

Matcher matcher = pattern.matcher(result);

if (matcher.find()) {

String memberCount = matcher.group(1);

System.out.println("会员:" + memberCount);

}

}
}
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值