HttpURLConnection

try

{

url = new URL(urlStr);

httpConn = (HttpURLConnection) url.openConnection();

HttpURLConnection.setFollowRedirects(true);

// logger.info(httpConn.getResponseMessage());

in = httpConn.getInputStream();

out = new FileOutputStream(new File(outPath));

chByte = in.read();

while (chByte != -1)

{

out.write(chByte);

chByte = in.read();

}

}

catch (MalformedURLException e)

{

}

}

经过一段时间的研究和查找资料,发现是由于上面的代码缺少了一些必要的信息导致,增加更加详细的属性

httpConn.setRequestMethod("GET");

httpConn.setRequestProperty("User-Agent","Mozilla/4.0 (compatible; MSIE 6.0; Windows 2000)");

完整代码如下:

public static void DownLoadPages(String urlStr, String outPath)

{

int chByte = 0;

URL url = null;

HttpURLConnection httpConn = null;

InputStream in = null;

FileOutputStream out = null;

try

{

url = new URL(urlStr);

httpConn = (HttpURLConnection) url.openConnection();

HttpURLConnection.setFollowRedirects(true);

httpConn.setRequestMethod("GET");

httpConn.setRequestProperty("User-Agent","Mozilla/4.0 (compatible; MSIE 6.0; Windows 2000)");

// logger.info(httpConn.getResponseMessage());

in = httpConn.getInputStream();

out = new FileOutputStream(new File(outPath));

chByte = in.read();

while (chByte != -1)

{

out.write(chByte);

chByte = in.read();

}

}

catch (MalformedURLException e)

{

e.printStackTrace();

}

catch (IOException e)

{

e.printStackTrace();

}

finally

{

try

{

out.close();

in.close();

httpConn.disconnect();

}

catch (Exception ex)

{

ex.printStackTrace();

}

}

}

此外,还有第二种方法可以访问Google的网站,就是用apache的一个工具HttpClient 模仿一个浏览器来访问Google

Document document = null;

HttpClient httpClient = new HttpClient();

GetMethod getMethod = new GetMethod(url);

getMethod.setFollowRedirects(true);

int statusCode = httpClient.executeMethod(getMethod);

if (statusCode == HttpStatus.SC_OK)

{

InputStream in = getMethod.getResponseBodyAsStream();

InputSource is = new InputSource(in);

DOMParser domParser = new DOMParser();    //nekoHtml 将取得的网页转换成dom

domParser.parse(is);

document = domParser.getDocument();

System.out.println(getMethod.getURI());

}

return document;

推荐使用第一种方式,使用HttpConnection 比较轻量级,速度也比第二种HttpClient 的快。

转载一些代码,使用HttpUrlConnection来模拟ie form登陆web:

关于java模拟ie form登陆web的问题

HttpURLConnection urlConn=(HttpURLConnection)(new URL(url).openConnection());

urlConn.addRequestProperty("Cookie",cookie);

urlConn.setRequestMethod("POST");

urlConn.setRequestProperty("User-Agent","Mozilla/4.0 (compatible; MSIE 6.0; Windows 2000)");

urlConn.setFollowRedirects(true);

urlConn.setDoOutput(true); // 需要向服务器写数据

urlConn.setDoInput(true); //

urlConn.setUseCaches(false); // 获得服务器最新的信息

urlConn.setAllowUserInteraction(false);

urlConn.setRequestProperty("Content-Type","application/x-www-form-urlencoded");

urlConn.setRequestProperty("Content-Language","en-US" );

urlConn.setRequestProperty("Content-Length", ""+data.length());

DataOutputStream outStream = new DataOutputStream(urlConn.getOutputStream());

outStream.writeBytes(data);

outStream.flush();

outStream.close();

cookie=urlConn.getHeaderField("Set-Cookie");

BufferedReader br=new BufferedReader(new InputStreamReader(urlConn.getInputStream(),"gb2312"));
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值