最近使用HttpClient爬取网页,下载图片,发现很多都有问题,图像只能显示一半。后来上网搜,找到了解决办法,如下所述。
下载部分的逻辑写成下面这样就可以了
public void download(String url) throws HttpException, ClientProtocolException, IOException
{
String[] filename;
HttpClient httpClient;
HttpGet getMethod = new HttpGet(url);
HttpResponse response = httpClient.execute(getMethod);
//logger.info(response.getStatusLine());
int statusCode = response.getStatusLine().getStatusCode();
if (statusCode == HttpStatus.SC_OK) {
filename = url.split("/"); // 取图片链接的最后一段为文件名
File storeFile = new File("E:\\JAVA_programm\\网页文件\\images\\" + filename[filename.length-1]
+ ".tmp"); // 先存为临时文件,等全部下完再改回原来的文件名
FileOutputStream outputStream = new FileOutputStream(storeFile);
InputStream inputStream = response.getEntity().getContent();
byte b[] = new byte[32*1024];
int j = 0;
while( (j = inputStream.read(b)) != -1 )
{
outputStream.write(b,0,j);
}
if (outputStream != null)
{
outputStream.close();
}
storeFile.renameTo(new File("E:\\JAVA_programm\\网页文件\\simplecd_images\\" + filename[filename.length-1]));
b = null;
logger.info("图片-" + filename[filename.length-1] + "-下载完成!!");
} else {
logger.error("Something wrong and the code is " + statusCode);
logger.error("And the wrong page is " + url);
}
response = null;
getMethod.releaseConnection();
}