import java.io.BufferedInputStream;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileOutputStream;
import java.io.InputStreamReader;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
*
* @author 方小洲
*
* 2013-8-5 上午8:46:54
*/
public class ImageParse {
/**
* 根据Url地址获取图片地址
* @param url
* @return
* @throws MalformedURLException
*/
public List<String> getImagesPath(String url) throws Exception {
List<String> imagePaths = new ArrayList<String>();
String htmlCode = getHtmlCode(url);
String imgRegs1 = "(?x)(src|SRC|background|BACKGROUND)=('|\")/?(([\\w-]+/)*([\\w-]+\\.(jpg|JPG|png|PNG|gif|GIF)))('|\")";
String imgRegs2 = "(?x)(src|SRC|background|BACKGROUND)=('|\")(http://([\\w-]+\\.)+[\\w-]+(:[0-9]+)*(/[\\w-]+)*(/[\\w-]+\\.(jpg|JPG|png|PNG|gif|GIF)))('|\")";
//针对没有全路径的,不带url地址的
Pattern pattern = Pattern.compile(imgRegs1);
Matcher matcher = pattern.matcher(htmlCode);
while (matcher.find()) {
imagePaths.add(url + "/" + matcher.group(3));
//System.out.println(url + "/" + matcher.group(3));
}
//针对全路径的,不带url地址的
pattern = Pattern.compile(imgRegs2);
matcher = pattern.matcher(htmlCode);
while (matcher.find()) {
imagePaths.add(matcher.group(3));
//System.out.println(matcher.group(3));
}
return imagePaths ;
}
/**
* 根据URL地址获取网页代码
* @param url
* @throws Exception
* @return
*/
public String getHtmlCode(String url) throws Exception {
StringBuffer sbf = new StringBuffer();
URL httpUrl = new URL(url);
BufferedReader reader = new BufferedReader(new InputStreamReader(httpUrl.openStream()));
String line = "" ;
while((line = reader.readLine()) != null){
//System.out.println(line);
sbf.append(line);
}
return sbf.toString();
}
/**
* 根据URL地址下载图片
* @param targetUrl 目标网址
* @param outputPath 生成的文件目录
* @throws Exception
*/
public void downLoadImages(String targetUrl , String outputPath) throws Exception{
List<String> imagePaths = getImagesPath(targetUrl);
for (String imagePath : imagePaths) {
generatorImageBathByUrl(imagePath , outputPath);
}
}
/**
* 下载图片
* @param imagePath
* @param outputPath
* @throws Exception
*/
public void generatorImageBathByUrl(String imagePath , String outputPath) throws Exception{
//outputPath = outputPath + "/" + imagePath.substring(imagePath.lastIndexOf("/"));
outputPath = outputPath + "/" + System.currentTimeMillis() + imagePath.substring(imagePath.lastIndexOf("."));
URL imageUrl = new URL(imagePath);
BufferedInputStream bis = new BufferedInputStream(imageUrl.openStream());
FileOutputStream fos = new FileOutputStream(new File(outputPath));
int pos ;
while((pos = bis.read()) != -1) {
fos.write(pos);
}
fos.close();
bis.close();
}
public static void main(String[] args) throws Exception {
ImageParse parse = new ImageParse();
parse.downLoadImages("http://www.fjboda.cn","d:\\image");
}
}