1 、纯粹Java实现
package com.leixinhui.test;
import java.io.FileOutputStream;
import java.io.InputStream;
import java.net.HttpURLConnection;
import java.net.URL;
public class Test {
/**
* @param args
* @throws Exception
* @throws SQLException
*/
public static void main(String[] args) throws Exception {
String strUrl = "http://www.zhaopin.com/";
String strOutFile = "D:\\My Project\\files\\智联招聘.html";
new Test().getHtmlPage(strUrl, strOutFile);
}
/**
* 抓取页面,纯粹Java实现
* @param strUrl 待抓页面URL
* @param strOutFile 输出文件
* @throws Exception
*/
private void getHtmlPage(String strUrl, String strOutFile) throws Exception {
URL url = null;
HttpURLConnection httpURLConnection = null;
InputStream inputStream = null;
FileOutputStream fileOutputStream = null;
byte[] buffer = new byte[1000];
int n = 0;
try {
url = new URL(strUrl);
httpURLConnection = (HttpURLConnection) url.openConnection();
inputStream = httpURLConnection.getInputStream();
fileOutputStream = new FileOutputStream(strOutFile);
while ((n = inputStream.read(buffer)) != -1) {
fileOutputStream.write(buffer, 0, n);
}
} catch (Exception e) {
e.printStackTrace();
throw e;
} finally {
if (null != inputStream) {
inputStream.close();
}
if (null != fileOutputStream) {
fileOutputStream.flush();
fileOutputStream.close();
}
if (null != httpURLConnection) {
httpURLConnection.disconnect();
}
}
}
}
2、使用Apache HttpClient实现
package com.leixinhui.test;
import java.io.FileOutputStream;
import java.io.InputStream;
import org.apache.http.HttpEntity;
import org.apache.http.HttpResponse;
import org.apache.http.HttpStatus;
import org.apache.http.client.HttpClient;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.DefaultHttpClient;
public class Test {
/**
* @param args
* @throws Exception
* @throws SQLException
*/
public static void main(String[] args) throws Exception {
String strUrl = "http://i2.img.969g.com/down/imgx2012/11/01/206_094829_e1ca3.jpg";
String strOutFile = "D:\\My Project\\files\\test2.jpg";
new Test().getHtmlPage(strUrl, strOutFile);
}
/**
* 抓取页面,使用Apache HttpClient实现
* @param strUrl 待抓页面URL
* @param strOutFile 输出文件
* @throws Exception
*/
private void getHtmlPage(String strUrl, String strOutFile) throws Exception {
HttpGet request = null;
HttpClient client = null;
HttpResponse response = null;
int statusCode = 0;
HttpEntity entity = null;
InputStream inputStream = null;
FileOutputStream outputStream = null;
byte[] buffer = new byte[1000];
int n = 0;
try {
request = new HttpGet(strUrl);
client = new DefaultHttpClient();
response = client.execute(request);
statusCode = response.getStatusLine().getStatusCode();
if(HttpStatus.SC_OK == statusCode){
entity = response.getEntity();
inputStream = entity.getContent();
outputStream = new FileOutputStream(strOutFile);
while ((n = inputStream.read(buffer)) != -1) {
outputStream.write(buffer, 0, n);
}
}
} catch (Exception e) {
e.printStackTrace();
throw e;
} finally {
if (null != inputStream) {
inputStream.close();
}
if (null != outputStream) {
outputStream.flush();
outputStream.close();
}
if (null != request) {
request.releaseConnection();
}
}
}
}
备注:使用Apache Http Client 4.2.6