突然发现爬虫挺有意思的,开始研究Java的爬虫框架,作为简单的入门,觉得jsoup还是比较适合初学者,下面就是爬天堂图片的图片的例子
Jsoupde 中文文档 http://www.open-open.com/jsoup/ 参考
1、根据页面源码提取图片的url
2、根据url下载图片
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.net.URLConnection;
import java.net.MalformedURLException;
import java.net.URL;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
public class JsoupImgDown {
public static void main(String[] args) {
try {
for(int i=0;i<9;i++){
int k=80+i;
Document doc = Jsoup.connect("http://www.ivsky.com/tupian/nvsheng_v39311/pic_6340"+k+".html").get();
// String title = doc.title();
if(doc!=null){
String path=doc.getElementById("imgis").attr("src");
System.out.println(path);
downPic(path,k+"b");
}
}
} catch (Exception e) {
e.printStackTrace();
}
}
public static void downPic(String path,String filename) throws IOException {
URL url = new URL(path);
URLConnection uc = url.openConnection();
uc.setRequestProperty("User-Agent", "Mozilla/4.0 (compatible; MSIE 5.0; Windows NT; DigExt)");
InputStream is = uc.getInputStream();
byte[] bs = new byte[1024];
FileOutputStream out = new FileOutputStream("E:\\temp\\"+filename+".jpg");
int i = 0;
while ((i = is.read(bs)) != -1) {
out.write(bs,0,i);
}
is.close();
out.close();
}
}