1 添加所需要的依赖
<dependency>
<groupId>org.apache.httpcomponents</groupId>
<artifactId>httpclient</artifactId>
</dependency>
<dependency>
<!-- jsoup HTML parser library @ htt p://jsoup.org/ -->
<groupId>org.jsoup</groupId>
<artifactId>jsoup</artifactId>
<version>1.10.2</version>
</dependency>
1.1 我这个先讲一下HttpClient和jsoup是干嘛的。
1.1.1 HttpClient是org.apache包下的一个对网络发起请求的一个类,可以发送get,post等常见的请求。下面简单给到两个案例。
发送GET请求
@Test
public void doGetWithParam() throws Exception{
//创建一个httpclient对象
CloseableHttpClient httpClient = HttpClients.createDefault();
//创建一个uri对象
URIBuilder uriBuilder = new URIBuilder("http://localhost:8080/httpclient/get");
uriBuilder.addParameter("query", "三国演义");
HttpGet get = new HttpGet(uriBuilder.build());
//执行请求
CloseableHttpResponse response = httpClient.execute(get);
//取响应的结果
int statusCode = response.getStatusLine().getStatusCode();
System.out.println(statusCode);
HttpEntity entity = response.getEntity();
String string = EntityUtils.toString(entity, "utf-8");
System.out.println(string);
//关闭httpclient
response.close();
httpClient.close();
}
发送POST请求
@Test
public void doPostWithParam() throws Exception{
CloseableHttpClient httpClient = HttpClients.createDefault();
//创建一个post对象
HttpPost post = new HttpPost("http://localhost:8080/httpclient/post");
//创建一个Entity。模拟一个表单
List<NameValuePair> kvList = new ArrayList<>();
kvList.add(new BasicNameValuePair("username", "zhangsan"));
kvList.add(new BasicNameValuePair("password", "123"));
//包装成一个Entity对象
StringEntity entity = new UrlEncodedFormEntity(kvList, "utf-8");
//设置请求的内容
post.setEntity(entity);
//执行post请求
CloseableHttpResponse response = httpClient.execute(post);
String string = EntityUtils.toString(response.getEntity());
System.out.println(string);
response.close();
httpClient.close();
}
1.1.2 jsoup 是一款Java 的HTML解析器,可直接解析某个URL地址、HTML文本内容。它提供了一套非常省力的API,可通过DOM,CSS以及类似于jQuery的操作方法来取出和操作数据。
案例就直接上抓取网页图片的代码:
@Test
public void ClientAndJsoup() {
//创建HttpClient对象
CloseableHttpClient aDefault = HttpClients.createDefault();
//这是一个背景图片地址
String url ="https://desk.zol.com.cn/";
//创建get请求
HttpGet httpGet = new HttpGet(url);
CloseableHttpResponse response=null;
try {
//发起网络请求
response = aDefault.execute(httpGet);
if(response.getStatusLine().getStatusCode() == HttpStatus.SC_OK) {
//5.获取响应内容
HttpEntity httpEntity = response.getEntity();
String html = EntityUtils.toString(httpEntity, "utf-8");
//通过jsoup解析html为Document
Document document = Jsoup.parse(html);
//这里是通过类选择器获取对应的多个Elements
Elements elementsByClass = document.getElementsByClass("pic-list2 clearfix");
CloseableHttpResponse newResponse=null;
for (Element byClass : elementsByClass) {
//获取他的子集第二个,a标签中的href属性的值
String href = byClass.child(2).select("a").attr("href");
//拼接跳转这个图片的详情地址
String newUrl = url+href;
//创建get请求
HttpGet newHttpGet = new HttpGet(newUrl);
//发起网络请求
newResponse= aDefault.execute(newHttpGet);
//5.获取响应内容
HttpEntity newHttpEntity = newResponse.getEntity();
String newHtml = EntityUtils.toString(newHttpEntity, "utf-8");
//通过jsoup解析html为Document
Document newDocument = Jsoup.parse(newHtml);
//通过id属性获取对应Element,在选择所有的a标签
// Elements newElements = newDocument.getElementById("tagfbl").select("a");
Elements newElements = newDocument.getElementById("showImg").select("a");
//这个是获取每一张图片的名字,选择title标签获取他的html中的内容
String title = newDocument.select("title").html();
for (Element newElement : newElements) {
String new2HrefUrl = newElement.attr("href");
String new2Url = url+new2HrefUrl;
//创建get请求
HttpGet new2HttpGet = new HttpGet(new2Url);
//发起网络请求
newResponse= aDefault.execute(new2HttpGet);
//5.获取响应内容
HttpEntity new2HttpEntity = newResponse.getEntity();
String new2Html = EntityUtils.toString(new2HttpEntity, "utf-8");
//通过jsoup解析html为Document
Document new2Document = Jsoup.parse(new2Html);
Elements new2Elements = new2Document.getElementById("tagfbl").select("a");
for (Element new2Element : new2Elements) {
//我这里只想下载2880x1800像素的壁纸,可以按照自己需求去改动
Element elementById = new2Element.getElementById("2880x1800");
if (elementById!=null){
//获取最后一个页面也就是2880x1800像素的大图页面地址
String lastUrl=url+new2Element.select("a").attr("href");
//创建get请求
HttpGet lastHttpGet = new HttpGet(lastUrl);
//发起网络请求
CloseableHttpResponse lastResponse = aDefault.execute(lastHttpGet);
//5.获取响应内容
HttpEntity lastHttpEntity = lastResponse.getEntity();
String lastHtml = EntityUtils.toString(lastHttpEntity, "utf-8");
//通过jsoup解析html为Document
Document lastDocument = Jsoup.parse(lastHtml);
//这里是选择该页面的img标签中src的值,就是一个完整的下载地址
String img = lastDocument.getElementsByTag("img").attr("src");
//调用下载方法
downImage(img,title+ UUID.randomUUID());
}
}
}
}
} else {
//如果返回状态不是200,比如404(页面不存在)等,根据情况做处理,这里略
System.out.println("返回状态不是200");
System.out.println(EntityUtils.toString(response.getEntity(), "utf-8"));
}
}catch (Exception e){
e.printStackTrace();
}
}
下载方法
public static void downImage(String imgurl,String fileName) {
String file = "D://image";//下载的目标路径
//判断目标文件夹是否存在
File files = new File(file);
if (!files.exists()) {
files.mkdirs();
}
InputStream is;
FileOutputStream out;
try {
URL url = new URL(imgurl);
HttpURLConnection connection = (HttpURLConnection) url.openConnection();
is = connection.getInputStream();
// 创建文件
File fileofImg = new File(file + "/" + fileName + ".jpg");
out = new FileOutputStream(fileofImg);
int i = 0;
while ((i = is.read()) != -1) {
out.write(i);
}
is.close();
out.close();
} catch (MalformedURLException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (FileNotFoundException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
最后下载的高清图就在对应的文件下面
![在这里插入图片描述](https://img-blog.csdnimg.cn/2c4a82f2822949bb8279f9139411cd25.png)