public class Jsoupee {
// API参考http://www.open-open.com/jsoup/
public static void main(String[] args) throws Exception {
// Document doc =
// Jsoup.connect("http://www.pixivision.net/zh/a/1600").get();
// http://www.pixivision.net/zh/a/1868
Document doc = Jsoup.connect("http://www.pixivision.net/zh/a/1846").get();
Elements imgs = doc.getElementsByClass("am__work__illust ");
int m = 0;
for (Element img : imgs) {
System.out.println(img.absUrl("src"));
String url = img.absUrl("src");
String fileName = "google" + m + ".jpg";
m++;
StoreFromNet2 net2 = new StoreFromNet2();
File ee = new File("D:/downloadimg/" + fileName);
net2.download(url, ee);
}
System.out.println(imgs.size());
System.out.println("抓去结束");
// 抓取谷歌pixi上的图片
Jsoupee ee = new Jsoupee();
ee.从goolepix上下图片();
}
/**
* 从googlepixi上抓取图片
*
* @throws IOException
*/
private void 从goolepix上下图片() throws IOException {
// ㈠字符串解析
// String html = "<html><head><title>First
// parse</title></head><body><p>Parsed HTML into a
// doc.</p></body></html>";
// Document doc = Jsoup.parse(html);
// ㈡从网页链接解析
// Document doc=Jsoup.connect("http://www.pixiv.net/").get();
// String title=doc.title();
// ㈢从本地文件中解析
// File input = new File("/tmp/input.html");
// Document doc = Jsoup.parse(input, "UTF-8", "http://example.com/");
Document doc = Jsoup.connect("https://plus.google.com/+pixiv").get();
// Element body=doc.body();
Elements imgs = doc.getElementsByClass("JZUAbb");
int m = 0;
for (Element img : imgs) {
System.out.println(img.absUrl("src"));
// URL url = new URL(img.absUrl("src"));
String url = img.absUrl("src");
StoreFromNet png = new StoreFromNet();
byte[] btImg = png.getImageFromNetByUrl(url);
// byte[] btImg=getImageFromNetByUrl(url);
if (null != btImg && btImg.length > 0) {
System.out.println("读取到:" + btImg.length + "字节");
String fileName = "google" + m + ".jpg";
m++;
png.writeImageToDisk(btImg, fileName);
} else {
System.out.println("没有从该链接获得内容");
}
// if(img.hasAttr("src")){
// System.out.println("有src标签");
// }
}
System.out.println(imgs.size());
System.out.println("抓去结束");
}
}
private OutputStream os;
/**
*
* @param imgurl
* 下载链接
* @param f
* 下载的图片文件
* @return 返回文件,失败的时候返回null
* @throws Exception
* 返回错误
*/
public File download(String imgurl, File f) throws Exception {
URL url = new URL(imgurl);
URLConnection con = url.openConnection();
int index = imgurl.indexOf("/", 10);
con.setRequestProperty("Host", index == -1 ? imgurl.substring(7) : imgurl.substring(7, index));
con.setRequestProperty("Referer", imgurl);
InputStream is = con.getInputStream();
if (con.getContentEncoding() != null && con.getContentEncoding().equalsIgnoreCase("gzip")) {
is = new GZIPInputStream(con.getInputStream());
}
byte[] bs = new byte[1024];
int len = -1;
os = new FileOutputStream(f);
while ((len = is.read(bs)) != -1) {
os.write(bs, 0, len);
}
os.flush();
os.close();
return f;
}
}