import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.MalformedURLException;
import java.net.URL;
import java.net.URLConnection;
import java.util.ArrayList;
import java.util.Calendar;
import java.util.Iterator;
import java.util.Random;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class Capture {
/**
* @param args
*/
public static void main(String[] args) {
String url = "http://www.jiukuaiyou.com";
ArrayList<String> list = getAllImages(url);
if(list.size()>0){
long starttime = System.currentTimeMillis();
for(String str : list){
try {
URL imgUrl = new URL(str);
URLConnection conn = imgUrl.openConnection();
conn.setConnectTimeout(1000);
InputStream input = conn.getInputStream();
byte[] b = new byte[1042];
int len = 0;
String baseDir = "F:\\imgs";
File f = new File(baseDir);
if(!f.isDirectory()){
f.mkdir();
}
String filename = new Long(System.currentTimeMillis()).toString() + ".jpg";
FileOutputStream out = new FileOutputStream(new File(baseDir + "\\" + filename));
while((len=input.read(b))!=-1){
out.write(b, 0, len);
}
out.close();
input.close();
} catch (MalformedURLException e1) {
e1.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
}
long endtime = System.currentTimeMillis();
System.out.println((float)(endtime-starttime)/1000);
}
}
public static ArrayList<String> getAllImages(String url){
String content = getContent(url);
ArrayList<String> list = null;
if(content.length()>0){
list = new ArrayList<String>();
ArrayList<String> preifxList = getImgHref(content);
if(preifxList.size()>0){
Iterator<String> it = preifxList.iterator();
while(it.hasNext()){
String str = it.next();
list.add(str.replace("_290x190.jpg", ""));
}
}
}
return list;
}
public static String getContent(String str){
URL url = null;
BufferedReader r=null ;
String tmp = null;
StringBuffer sb = new StringBuffer();
try {
url = new URL(str);
r = new BufferedReader(new InputStreamReader(url.openStream(),"utf-8"));
while((tmp=r.readLine())!=null){
sb.append(tmp);
}
r.close();
} catch (MalformedURLException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
return sb.toString();
}
public static ArrayList<String> getImgHref(String content){
int maxPage = getMaxPage(content); //匹配最大页数
ArrayList<String> list = null;
if(maxPage >0){
list = new ArrayList<String>();
for(int i=1; i<=maxPage;i++){
String baseUrl = "http://www.jiukuaiyou.com/jiu/all/whole/"+i;
String str = getContent(baseUrl);
if(null!=str && str.length()>0){
String reg = "data-original=\\\"(.*?)\\\"";
Pattern p = Pattern.compile(reg);
Matcher m = p.matcher(content);
if(m.find()){
while(m.find()){
list.add(m.group(1));
}
}else{
System.out.println("匹配不上");
}
}
}
}
return list;
}
public static int getMaxPage(String content){
int maxPage = 0;
ArrayList<Integer> list = null;
String regPage = "<a href=\\\"/jiu/all/whole/(\\d+)\\\"";
Pattern p = Pattern.compile(regPage);
Matcher m = p.matcher(content);
if(m.find()){
list = new ArrayList<Integer>();
while(m.find()){
list.add(new Integer(m.group(1)));
}
if(list.size()>0){
for(Integer page:list){
maxPage = page > maxPage ? page : maxPage;
}
}
}else{
System.out.println("没有匹配到");
}
return maxPage;
}
public static void test(Object s){
System.out.println(s.toString());
}
}
java采集网页图片
最新推荐文章于 2024-01-08 15:51:25 发布