1.新建maven工程,pom.xml导入
<dependency>
<groupId>org.apache.httpcomponents</groupId>
<artifactId>httpclient</artifactId>
<version>4.5.2</version>
</dependency>
<!-- https://mvnrepository.com/artifact/commons-io/commons-io -->
<dependency>
<groupId>commons-io</groupId>
<artifactId>commons-io</artifactId>
<version>2.5</version>
</dependency>
<dependency>
<groupId>org.jsoup</groupId>
<artifactId>jsoup</artifactId>
<version>1.10.2</version>
</dependency>
<groupId>org.apache.httpcomponents</groupId>
<artifactId>httpclient</artifactId>
<version>4.5.2</version>
</dependency>
<!-- https://mvnrepository.com/artifact/commons-io/commons-io -->
<dependency>
<groupId>commons-io</groupId>
<artifactId>commons-io</artifactId>
<version>2.5</version>
</dependency>
<dependency>
<groupId>org.jsoup</groupId>
<artifactId>jsoup</artifactId>
<version>1.10.2</version>
</dependency>
2.编写java代码,图片网站一般页面代码都是很规律的
import java.io.File;
import java.io.InputStream;
import java.util.concurrent.Executors;
import java.util.concurrent.ThreadPoolExecutor;
import org.apache.commons.io.FileUtils;
import org.apache.http.HttpEntity;
import org.apache.http.client.config.RequestConfig;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.util.EntityUtils;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
public class WallPaper {
public static final String path="F:\\image\\";
public static final String baseUrl="http://desk.zol.com.cn/pc/";
public static final String wallPaperUrl="http://desk.zol.com.cn";
public static int maxPage=6837;//代表结束的地址
private static final ThreadPoolExecutor pool=(ThreadPoolExecutor) Executors.newFixedThreadPool(20);
public static void main(String[] args) throws Exception{
for (int i = 0; i < maxPage; i++) {
if(i==0){
part(baseUrl);
}else{
part(baseUrl+i+".html");
}
}
}
public static void part(String url) throws Exception{
System.out.println("当前访问壁纸列表URL:"+url);
CloseableHttpClient httpClient = HttpClients.createDefault(); // 创建httpClient实例
HttpGet httpget = new HttpGet(url);
CloseableHttpResponse response = null;
response = httpClient.execute(httpget);
HttpEntity entity = response.getEntity(); // 获取返回实体
String content=EntityUtils.toString(entity, "gb2312");
response.close();
Document doc = Jsoup.parse(content);
//获取每个链接
Elements articles = doc.select("ul.pic-list2 li a");
for (Element article : articles) {
String href = article.attr("href");
String title = article.select("img").first().attr("title");
//这些目录特殊符号不能被创建
title=title.replace("\\", "");
title=title.replace("/", "");
title=title.replace(":", "");
title=title.replace("*", "");
title=title.replace("\"", "");
title=title.replace("?", "");
title=title.replace("<", "");
title=title.replace(">", "");
title=title.replace("|", "");
File dir=new File(path+title);
if(dir.exists()){
System.out.println("已存在->壁纸列表:"+url+",title:"+title);
continue;
}
article(wallPaperUrl+href,title);
}
httpClient.close();
}
public static void article(String url,String title) throws Exception{
System.out.println("当前访问article:"+url+",title:"+title);
CloseableHttpClient httpClient = HttpClients.createDefault(); // 创建httpClient实例
HttpGet httpget = new HttpGet(url);
CloseableHttpResponse response = null;
response = httpClient.execute(httpget);
HttpEntity entity = response.getEntity(); // 获取返回实体
String content=EntityUtils.toString(entity, "gb2312");
response.close();
Document doc = Jsoup.parse(content);
Elements elements = doc.select("#showImg img");
for (Element element : elements) {
String src = "".equals(element.attr("src"))?element.attr("srcs"):element.attr("src");
src=src.replace("t_s144x90c5", "t_s960x600c5");//根据实际情况,查看网页发现#showImg img下面都是小图片,
//再看大图片地址跟小图片地址就这个地方不一样
// save(src,title);
pool.execute(new DownLoadTask(src, title));
}
httpClient.close();
}
public static void save(String url,String title){
try {
CloseableHttpClient httpClient = HttpClients.createDefault(); // 创建httpClient实例
HttpGet httpget = new HttpGet(url);
RequestConfig config=RequestConfig.custom()
.setConnectTimeout(3000)//设置链接超时时间,单位毫秒
.setSocketTimeout(3000)//设置读取超时时间
.build();
httpget.setConfig(config);
CloseableHttpResponse response = null;
response = httpClient.execute(httpget);
HttpEntity entity = response.getEntity(); // 获取返回实体
InputStream content = entity.getContent();
String filename=url.substring(url.lastIndexOf("/")+1);
FileUtils.copyToFile(content, new File(path+title+"\\"+filename));//使用commons-io方便
response.close();
httpClient.close();
} catch (Exception e) {
System.out.println("异常的url:"+url+",title"+title);
e.printStackTrace();
}
}
static class DownLoadTask implements Runnable{
private String url;
private String title;
public DownLoadTask(String url,String title) {
this.url=url;
this.title=title;
}
public void run() {
// System.out.println("当前排队线程数:" + pool.getQueue().size());
// System.out.println("当前活动线程数:" + pool.getActiveCount());
if(pool.getQueue().size()==0){
System.out.println("当前无排队线程!!!");
}
save(url, title);
}
}
}
3.实验效果
4.说明
下载的壁纸还不够清晰,实现思路如上,具体爬什么网站图片看你们自己了!