packagemain;importjava.io.ByteArrayOutputStream;importjava.io.File;importjava.io.FileOutputStream;importjava.io.IOException;importjava.io.InputStream;importjava.net.HttpURLConnection;importjava.net.URL;importjava.util.ArrayList;importjava.util.List;importjava.io.DataInputStream;importjava.io.DataOutputStream;importorg.jsoup.Jsoup;importorg.jsoup.nodes.Document;importorg.jsoup.select.Elements;public classWrom {//定义爬取网站,方便获取url所以定义两个
static String baseurl = "http://www.acgwallpaper.com";static String geturl = "http://www.acgwallpaper.com/";static String filepath = "D:\\testwrom";//定义图片存放的文件夹
static int j = 0;//计数用的变量
public static voidmain(String[] args) {
System.out.println("初始下载页面:" +geturl);
String html= getHtml(geturl); //从url解析得到html的body
List href_list = getNextUrl(html); //将href和图片所在网址构成新的url存在链表list
for (int i = 0; i < href_list.size(); i++) {
System.out.println("图片下载界面:" +href_list.get(i));
String newhtml= getHtml(href_list.get(i));//解析得到由href和网址构成的新的html的body
List photo_list = getImgSrcListFromHtml(newhtml);//查找src路径
downloadImg(photo_list, filepath); //下载图片
}
System.out.println("下载完毕");
}public staticString getHtml(String url){
String html= "";try{
html= Jsoup.connect(url).timeout(5000).execute().body();//测试发现有时会断开连接,故设置连接时间为5s
} catch(IOException e) {
e.printStackTrace();
}returnhtml;
}public static ListgetImgSrcListFromHtml(String html) {
List list = new ArrayList<>();//存放图片超链接
Document doc = Jsoup.parse(html);//解析html页面
Elements elements = doc.select("img[src$=.jpg]");//获取目标尾缀是.jpg的图像
System.out.println("此页面图像个数:" +elements.size());for(int i = 0; i < elements.size(); i++) {
list.add(elements.get(i).attr("src"));//将图片超链接放入String链表内,方便下载
}returnlist;
}public static ListgetNextUrl(String html){
String forget="/";
List list = new ArrayList<>();
Document document= Jsoup.parse(html);//解析成html页面
Elements elements = document.select("div [class=artwork-block col-xs-6 col-sm-4 col-md-3]").select("a");//获取目标
System.out.println("页面抓取个数" +elements.size());for (int i = 0;i < elements.size(); i++) {
String url= baseurl + elements.get(i).attr("href")+ forget; //图片所在的网页url
list.add(url);
}returnlist;
}public static void downloadImg(List list, String filepath){
URL newUrl= null;
HttpURLConnection hconnection= null;try{for(int i = 0; i < list.size(); i++, j++) {
String newlist= baseurl +list.get(i);
System.out.println(newlist);
String filename= "/img" + j + ".jpg";//文件命名,不能重名
System.out.println(filename);
newUrl= newURL(newlist);
hconnection= (HttpURLConnection) newUrl.openConnection(); //打开连接
DataInputStream in = new DataInputStream(hconnection.getInputStream());//获取输入流对象
DataOutputStream out = new DataOutputStream(new FileOutputStream(filepath + filename));//输出流
byte[] buffer = new byte[4096];int count = 0;//将输入流以字节的形式读取并写入buffer中
while ((count = in.read(buffer)) > 0) {
out.write(buffer,0, count);
}//关闭该关的东西
out.close();
in.close();
hconnection.disconnect();
}
}catch(Exception e) {
System.out.println("失误");
}
}
}