java网络编程爬取网站图片

 复制代码直接用就可以了

import java.io.BufferedReader;
import java.io.DataInputStream;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.PrintWriter;
import java.net.URL;
import java.net.URLConnection;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class Url {
	// 地址
	private static final String WEB_SITE = "https://www.enterdesk.com/special/dongmantupian/dmtpbz/";

	public static void main(String[] args) throws IOException  {
		//1.获取网站源码
		String htmlInfo = getHtml();
		//System.out.println(htmlInfo);
		//2.获取图片url链接地址
		List<String> imageSrc = getImageSrc(htmlInfo);
		//3.下载图片
		downloadImage(imageSrc);
	}
	public static String getHtml() throws IOException{
		URL url = new URL(WEB_SITE);
		URLConnection uc = url.openConnection();
		BufferedReader br = new BufferedReader(new InputStreamReader(uc.getInputStream(),"UTF-8"));
		String inputline="";
		StringBuffer sb = new StringBuffer();
		while((inputline=br.readLine())!=null){
			sb.append(inputline);
		}
		br.close();
		return sb.toString();
	}
	public static List<String> getImageSrc(String htmlInfo){
		List<String> pics = new ArrayList<>();
		String img = "";
		Pattern p_image;
		Matcher m_image;
		// String regEx_img = "<img.*src=(.*?)[^>]*?>"; //图片链接地址
		String regEx_img = "<img.*src\\s*=\\s*(.*?)[^>]*?>";
		p_image = Pattern.compile(regEx_img, Pattern.CASE_INSENSITIVE);
		m_image = p_image.matcher(htmlInfo);
		while (m_image.find()) {
			// 得到<img />数据
			img = m_image.group();
			// 匹配<img>中的src数据
			Matcher m = Pattern.compile("src\\s*=\\s*\"?(.*?)(\"|>|\\s+)").matcher(img);
			while (m.find()) {
				pics.add(m.group(1));
			}
		}
		//pics.forEach(System.out::println);
		return pics;
	}
	
	public static void downloadImage(List<String> imageSrc) throws IOException {
		for (String src : imageSrc) {
			URL url =null;
			try {
				url = new URL(src);
			} catch (IOException e) {
				continue;
			}
			// 下在资源
			DataInputStream dataInputStream = new DataInputStream(url.openStream());
			FileOutputStream fileOutputStream = new FileOutputStream(new File("F:\\beauty\\" + NetUtil.getStrName(src)));
			byte[] bytes = new byte[1024];
			int length = 0;
			while ((length = dataInputStream.read(bytes)) != -1) {
				fileOutputStream.write(bytes, 0, length);
				System.out.println("下载中....");
			}
			System.out.println("下载完成...");
			dataInputStream.close();
			fileOutputStream.close();
		}
	}


}
class NetUtil{
	 /**
	  * 获取url链接的图片名称
     * @param url
     * @return
     */
    public static String getStrName(String url) {  
        String[] sarry = url.split("/");  
        return sarry[sarry.length - 1];  
    }  

}

运行好的效果:

效果图哟~

 

  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值