最近使用HttpClient爬取网页,下载图片,发现很多都有问题,图像只能显示一半。后来上网搜,找到了解决办法,如下所述。
下载部分的逻辑写成下面这样就可以了
public
void
download(String url)
throws
HttpException, ClientProtocolException, IOException
{
String[] filename;
HttpClient httpClient;
HttpGet getMethod =
new
HttpGet(url);
HttpResponse response = httpClient.execute(getMethod);
//logger.info(response.getStatusLine());
int
statusCode = response.getStatusLine().getStatusCode();
if
(statusCode == HttpStatus.SC_OK) {
filename = url.split(
"/"
);
// 取图片链接的最后一段为文件名
File storeFile =
new
File(
"E:\\JAVA_programm\\网页文件\\images\\"
+ filename[filename.length-
1
]
+
".tmp"
);
// 先存为临时文件,等全部下完再改回原来的文件名
FileOutputStream outputStream =
new
FileOutputStream(storeFile);
InputStream inputStream = response.getEntity().getContent();
byte
b[] =
new
byte
[
32
*
1024
];
int
j =
0
;
while
( (j = inputStream.read(b)) != -
1
)
{
outputStream.write(b,
0
,j);
}
if
(outputStream !=
null
)
{
outputStream.close();
}
storeFile.renameTo(
new
File(
"E:\\JAVA_programm\\网页文件\\simplecd_images\\"
+ filename[filename.length-
1
]));
b =
null
;
logger.info(
"图片-"
+ filename[filename.length-
1
] +
"-下载完成!!"
);
}
else
{
logger.error(
"Something wrong and the code is "
+ statusCode);
logger.error(
"And the wrong page is "
+ url);
}
response =
null
;
getMethod.releaseConnection();
}