/*
*2015/4/14
*/
public class DownImages {
private static int COUNT = 0;
private static int DOWN_COUNT = 0;
public static void jsoupHTML(String urlPath, String saveDz) throws Exception{
Document doc = Jsoup.connect(urlPath).timeout(1000000).get();
//:当前页中的图片
Elements srcLinks = doc.select("img[src$=.jpg]");
for (Element link : srcLinks) {
//:剔除标签,只剩链接路径
String imagesPath = link.attr("src");
System.out.println("当前访问路径:"+imagesPath);
getImages(urlPath,imagesPath, saveDz+ ++COUNT +".jpg");
}
//:当前页中的图片
Elements srcLinks_gifs = doc.select("img[src$=.gif]");
for (Element link : srcLinks_gifs) {
//:剔除标签,只剩链接路径
String imagesPath = link.attr("src");
System.out.println("当前访问路径:"+imagesPath);
getImages(urlPath,imagesPath, saveDz+ ++COUNT +".gif");
}
//:当前页中的图片
Elements srcLinks_pngs = doc.select("img[src$=.png]");
for (Element link : srcLinks_pngs) {
//:剔除标签,只剩链接路径
String imagesPath = link.attr("src");
System.out.println("当前访问路径:"+imagesPath);
getImages(urlPath,imagesPath, saveDz+ ++COUNT +".png");
}
//:提取网站中所有的href连接
Elements linehrefs = doc.select("a[href]");
for (Element linehref : linehrefs) {
String lihr = linehref.attr("href");
if(lihr.length()>4){
String ht = lihr.substring(0, 4);
String htt = lihr.substring(0, 1);
if(!ht.equals("http") && htt.equals("/")){
lihr = urlPath + lihr;
}
if(lihr.substring(0, 4).equals("http")){
Document docs = Jsoup.connect(lihr).timeout(1000000).get();
Elements links = docs.select("img[src$=.jpg]");
for (Element link : links) {
//:剔除标签,只剩链接路径
String imagesPath = link.attr("src");
System.out.println("当前访问路径:"+imagesPath);
// getImages(urlPath+imagesPath, "d://images//0000"+ COUNT++ +".jpg");
getImages(urlPath,imagesPath, saveDz+ COUNT++ +".jpg");
}
Elements links_gifs = docs.select("img[src$=.gif]");
for (Element link : links_gifs) {
//:剔除标签,只剩链接路径
String imagesPath = link.attr("src");
System.out.println("当前访问路径:"+imagesPath);
// getImages(urlPath+imagesPath, "d://images//0000"+ COUNT++ +".gif");
getImages(urlPath,imagesPath, saveDz+ COUNT++ +".gif");
}
Elements links_pngs = docs.select("img[src$=.png]");
for (Element link : links_pngs) {
//:剔除标签,只剩链接路径
String imagesPath = link.attr("src");
System.out.println("当前访问路径:"+imagesPath);
//getImages(urlPath+imagesPath, "d://images//0000"+ COUNT++ +".png");
getImages(urlPath,imagesPath, saveDz+ COUNT++ +".png");
}
}
}
}
}
/**
43
* @param urlPath 图片路径
44
* @throws Exception
45
*/
public static void getImages(String urlPath,String imagePath,String fileName) throws Exception{
String realUrl ="";
if(imagePath.startsWith("http")){
realUrl = imagePath;
}else{
realUrl = urlPath+imagePath;
}
URL url = new URL(realUrl);//:获取的路径
//:http协议连接对象
HttpURLConnection conn = (HttpURLConnection) url.openConnection();
conn.setRequestMethod("GET");
conn.setReadTimeout(6 * 10000);
//HTTP 响应消息获取状态码 getResponseCode 200 401等 getResponseMessage 正常 等
if (conn.getResponseCode() <10000){
InputStream inputStream = conn.getInputStream();
byte[] data = readStream(inputStream);
if(data.length>(1024*10)){
FileOutputStream outputStream = new FileOutputStream(fileName);
outputStream.write(data);
System.err.println("第"+ ++DOWN_COUNT +"图片下载成功"); //err优先级比out高, log4j
outputStream.close();
}
}
}
/**
66
* 读取url中数据,并以字节的形式返回
67
* @param inputStream
68
* @return
69
* @throws Exception
70
*/
public static byte[] readStream(InputStream inputStream) throws Exception{
ByteArrayOutputStream outputStream = new ByteArrayOutputStream();
byte[] buffer = new byte[1024];
int len = -1;
while((len = inputStream.read(buffer)) !=-1){
outputStream.write(buffer, 0, len);
}
outputStream.close();
inputStream.close();
return outputStream.toByteArray();
}
/*
* 文件重命名
*/
public static String rename(){
SimpleDateFormat sdf = new SimpleDateFormat("yyyyMMddHHmmss");
String name = sdf.format(new Date());
return name;
}
public static void main(String[] args) {
String saveDz = "F://images1//";
File file = new File(saveDz);
if(!file.exists()){
file.mkdir();
}
try {
//String urlPath = "http://www.22mm.cc/";
// String urlPath = "http://www.22mm.cc/"; //http://www.521auto.com/
// String urlPath = "http://www.163.com/";
String urlPath = "http://www.521auto.com";
jsoupHTML(urlPath,saveDz+"0000");
// jsoupHTML(urlPath,saveDz+rename());
} catch (Exception e) {
e.printStackTrace();
}finally{
System.out.println("共访问"+COUNT+"张图片,其中下载"+DOWN_COUNT+"张图片");
}
}
}