package cn.itcast.httpserver;
import java.io.BufferedInputStream;
public class DownloadImage {
private String url;
public DownloadImage(String url) {
this.url = url;
}
public void run() {
/**
* 获取相关的html页面 解析html页面中<img>标签,并且获取src属性的值,把图片的地址放到一个集合中
*/
//获取html代码
String html = loadHtml(this.url);
//从html代码中获取所有的img标签的src属性
List<String> imgPaths = parseImagePath(html);
for (String imgPath : imgPaths) {
//获取网络图片url路径
String path = "http://127.0.0.1/"+imgPath;
Thread thread = new Thread(new ImageDownloadTask(path,"d:\\image\\download"));
thread.start();
}
}
//从html代码中获取所有的img标签的src属性
private List<String> parseImagePath(String html) {
List<String> imagePaths = new ArrayList<String>();
Pattern imgPattern = Pattern.compile("<img.*?/>");
Pattern srcPattern = Pattern.compile("<img.*src=([\"|'])(.*?)\\1.*>",Pattern.CASE_INSENSITIVE);
//匹配页面中所有img标签
Matcher matcher = imgPattern.matcher(html);
while(matcher.find()){
//匹配页面中所有img标签
String img = matcher.group();
/*int start = img.indexOf("src=\"");
int end = img.indexOf("\"",start+5);
//System.out.println(start+","+end);
String path = img.substring(start+5,end);*/
//通过匹配出来的img标签,获取img中src属性
Matcher srcMatcher = srcPattern.matcher(img);
if(srcMatcher.find()){
String path=srcMatcher.group(2);
//匹配到src中的值,加入到集合中
imagePaths.add(path);
}
}
return imagePaths;
}
/**
* 加载网络中的html代码
* @return
*/
private String loadHtml(String urlString) {
URL url;
BufferedReader reader = null;
HttpURLConnection connection = null;
try {
url = new URL(urlString);
connection = (HttpURLConnection) url.openConnection();
reader = new BufferedReader(new InputStreamReader(connection.getInputStream()));
String s = null;
StringBuilder sb = new StringBuilder();
while ((s = reader.readLine()) != null) {
sb.append(s);
}
return sb.toString();
} catch (Exception e) {
e.printStackTrace();
}finally{
if(reader!=null){
try {
reader.close();
} catch (IOException e) {}
}
if(connection!=null){
connection.disconnect();
}
}
return "";
}
public static class ImageDownloadTask implements Runnable{
private String urlString;
private String baseDir;
/**
* @param url 网络图片地址
* @param baseDir 保存到本地的文件目录
*/
public ImageDownloadTask(String url,String baseDir) {
this.urlString = url;
this.baseDir = baseDir;
}
@Override
public void run() {
HttpURLConnection httpURLConnection = null;
BufferedInputStream input = null;
BufferedOutputStream output = null;
try {
URL url = new URL(this.urlString);
httpURLConnection = (HttpURLConnection) url.openConnection();
input = new BufferedInputStream(httpURLConnection.getInputStream()) ;
//创建图片所保存的文件名 d:\\image\\download\\fadsfadsf234234.gif
File f =new File(this.baseDir,UUID.randomUUID().toString()+".gif");
//d:\\image\\download
if(!f.getParentFile().exists()){
f.getParentFile().mkdirs();
}
output = new BufferedOutputStream(new FileOutputStream(f));
byte[] bs = new byte[1024];
int len = 0;
while((len=input.read(bs))>0){
output.write(bs,0,len);
}
output.flush();
} catch (Exception e) {
e.printStackTrace();
}finally{
if(input!=null){
try {
input.close();
} catch (IOException e) {
e.printStackTrace();
}
}
if(output!=null){
try {
output.close();
} catch (IOException e) {
e.printStackTrace();
}
}
httpURLConnection.disconnect();
}
}
}
public static void main(String[] args) {
DownloadImage downloadImage = new DownloadImage("http://127.0.0.1/index.html");
downloadImage.run();
}
}
java-下载图片(正则表达式、HttpURLConnection、流、socket综合运用)
最新推荐文章于 2021-02-16 13:11:56 发布