importjava.io.File;importjava.io.FileOutputStream;importjava.io.IOException;importjava.io.InputStream;importjava.net.URL;importjava.net.URLConnection;importjava.util.ArrayList;importjava.util.List;importjava.util.regex.Matcher;importjava.util.regex.Pattern;public classCatchImg {static String url="你想爬取得网址";//匹配的Url正则
static String url_reg="]*?>";//匹配Src的正则
static String src_reg="[a-zA-z]+://[^\\s]*";public static void main(String[] args) throwsException
{//获取HTML内容
String html=getHtml(url);//获取Url
List listUrl=getUrl(html);//获取Src
List listSrc=getSrc(listUrl);//下载到本地
Download(listSrc);
}public static String getHtml(String url) throwsException
{
URL uri=newURL(url);
URLConnection connection=uri.openConnection();
InputStream in=connection.getInputStream();
StringBuffer sb=newStringBuffer();byte[] buf=new byte[1024];int len=0;while((len=in.read(buf))!=-1)
{//这个地方要注意,如果用BufferedReader按line读取的话,每行后面要加上append("/n")
sb.append(new String(buf,0,len));
}
in.close();returnsb.toString();
}public static ListgetUrl(String html)
{
Matcher matcher=Pattern.compile(url_reg).matcher(html);
List listUrl=new ArrayList();while(matcher.find())
{
listUrl.add(matcher.group());
}returnlistUrl;
}public static List getSrc(ListlistUrl )
{
List listSrc=new ArrayList();for(String temp:listUrl)
{
Matcher matcher=Pattern.compile(src_reg).matcher(temp);while(matcher.find())
{//如果吧下面代码改成listSrc.add(matcher.group()),因为下面代码是把最后一个字符去掉的
listSrc.add(matcher.group().substring(0,matcher.group().length() - 1));
}
}returnlistSrc;
}private static void Download(ListlistImgSrc) {int count=0;try{for(String url : listImgSrc)
{
String imageName= count+".JPG";
URL uri= newURL(url);
InputStream in=uri.openStream();
FileOutputStream fo= new FileOutputStream(new File(imageName));//文件输出流
byte[] buf = new byte[1024];int length = 0;
System.out.println("开始下载:" +url);while ((length = in.read(buf)) != -1) {
fo.write(buf,0, length);
}
in.close();
fo.close();
System.out.println(imageName+ "下载完成");
count++;
}
}catch(Exception e) {
System.out.println("下载失败");
}
}
}