可以根据图片url下载网络图片到本地
package DYBZ;
import java.io.BufferedInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.net.HttpURLConnection;
import java.net.URL;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class ImageDownloader {
public static void main(String[] args) {
//https://tse4-mm.cn.bing.net/th/id/OIP-C.jb-OE259cWU7Y_29TRf7bAHaEK?w=329&h=185&c=7&r=0&o=5&pid=1.7
String imageUrl = "https://tse4-mm.cn.bing.net/th/id/OIP-C.jb-OE259cWU7Y_29TRf7bAHaEK?w=329&h=185&c=7&r=0&o=5&pid=1.7"; // 替换为实际的图片URL
String destinationFilePath = "E:\\MDS\\JavaWork\\Book2\\src\\main\\resources\\image.png"; // 替换为实际的保存路径和文件名
downloadImage(imageUrl, destinationFilePath);
}
//下载网络图片 imageUrl是网络图片的url destinationFilePath是图片保存地址
public static int downloadImage(String imageUrl, String destinationFilePath) {
System.out.println("Downloading image from: " + imageUrl);
System.out.println("Destination file path: " + destinationFilePath);
int responseCode = 0;
try {
URL url = new URL(imageUrl);
HttpURLConnection httpConn = (HttpURLConnection) url.openConnection();
httpConn.setRequestMethod("GET");
// 添加请求头信息(如果需要)
httpConn.setRequestProperty("Cookie", "PHPSESSID=jv5tbv9kid1i099l4r12qikls1; Hm_lvt_862e8e4f50ca4af123854e8434f8698a=1715242922; Hm_lvt_a0b498a32fffa7c376b36f470e5b5efa=1715242922; Hm_lpvt_862e8e4f50ca4af123854e8434f8698a=1715313174; Hm_lpvt_a0b498a32fffa7c376b36f470e5b5efa=1715313174; UNCLICKPOP_11_zzlm=22; cf_clearance=QfPUubAZCnQrv1cDkKmxdX_3QvW37VIrheOBBg2kPMk-1715390374-1.0.1.1-n2o4TEFDEpgkr6tAB2QXhQKJpMIAZjjWfykqOagGGOoXlWaNdrNiS1mo1fHKC0y9soAMrK56g6wmN0BdMjkbCw");
httpConn.setRequestProperty("User-Agent", "Mozilla/5.0 (Linux; Android 11; MI 6 Build/RQ3A.222001.001) AppleWebKit/558.32 (KHTML, like Gecko) Version/4.0 Chrome/114.0.5172.87 Mobile Safari/547.24");
httpConn.connect();
responseCode = httpConn.getResponseCode();
if (responseCode == HttpURLConnection.HTTP_OK) {
InputStream inputStream = new BufferedInputStream(httpConn.getInputStream());
FileOutputStream outputStream = new FileOutputStream(destinationFilePath);
byte[] buffer = new byte[4096];
int bytesRead;
while ((bytesRead = inputStream.read(buffer)) != -1) {
outputStream.write(buffer, 0, bytesRead);
}
outputStream.close();
inputStream.close();
httpConn.disconnect();
System.out.println("Image downloaded successfully.");
} else {
System.out.println("GET request not worked");
}
} catch (Exception e) {
e.printStackTrace();
}
return responseCode;
}
//下载爬虫后的html的所有网络图片
public void download_Html_IMG(String html) throws Exception {
// 定义一个正则表达式来匹配 img 标签中的 src 属性值,并捕获文件名部分
// 注意:这个正则表达式可能需要根据实际的HTML结构进行调整
Pattern pattern = Pattern.compile("<img[^>]+src=\"([^\"]+\\.png)\"[^>]*>", Pattern.CASE_INSENSITIVE);
Matcher matcher = pattern.matcher(html);
List<String> imageNames = new ArrayList<>();
while (matcher.find()) {
// matcher.group(1) 会捕获到第一个括号内的内容,即文件名
String imageName = matcher.group(1);
// 将网络图片地址加入列表中
imageNames.add(imageName);
}
//有些图片是放在服务器里面的,可能没有域名,需要自己补全
for (int i = 0; i < imageNames.size(); i++) {
String imageUrl = imageNames.get(i); // 替换为实际的图片URL
String destinationFilePath = "E:\\MDS\\JavaWork\\Book2\\src\\main\\resources\\image\\" + imageNames.get(i); // 替换为实际的保存路径和文件名
int code = downloadImage(imageUrl, destinationFilePath);
System.out.print(imageNames.get(i) + ":" + code + "\t");
}
System.out.println("\n");
}
}