利用Jsoup爬取一组图片

最新推荐文章于 2021-06-29 22:28:34 发布

shb19891

最新推荐文章于 2021-06-29 22:28:34 发布

阅读量1.5k

点赞数 1

分类专栏： JAVA爬虫文章标签：图片源码

本文链接：https://blog.csdn.net/shb19891/article/details/19807487

版权

JAVA爬虫专栏收录该内容

4 篇文章 0 订阅

订阅专栏

package mySource;

import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.net.MalformedURLException;
import java.net.URL;
import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.HashSet;
import java.util.Set;

import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

/*自动下载某个页面下的所有图片*/
public class dSomeImg {
public void parseUrl(String url) {
try {
Document doc = (Document)Jsoup.connect(url).get(); /*这里可以直接用jsoup获取网页源码，对于需要登录验证之类的网站则需要使用HTTPCLIENT来获取HTML内容*/
Elements eles = doc.select("img"); /*选取所有IMG标签元素并打印出来*/
System.out.println(eles);
// Elements pc1 = doc.select("td#prodImageCell>a>img"); /*选取TD标签下CLASS为PRODIMAGECELL中的A标签下的IMG标签*/
// String wangzhi = pc1.attr("src"); /*得到IMG的链接地址*/
// System.out.println(wangzhi);
for(Element ele:eles){
String path = "D:/image/"; /*生成要保存的图片文件名*/
String name = new SimpleDateFormat("yyyyMMddHHmmssSSS").format(new Date());
path += name + ".jpg";
String imgUrl = ele.attr("src").toString();
downloadImg(imgUrl,path);
}

} catch (IOException e) {
e.printStackTrace();
}

}

/*下载图片方法*/

public static void downloadImg(String imgUrl,String path){
URL url = null;
try {
url = new URL(imgUrl);
} catch (MalformedURLException e2) {
e2.printStackTrace();
return;
}

InputStream is = null;
try {
is = url.openStream();
} catch (IOException e1) {
e1.printStackTrace();
return;
}

OutputStream os = null;
try{
os = new FileOutputStream(path);
int bytesRead = 0;
byte[] buffer = new byte[8192];
while((bytesRead = is.read(buffer,0,8192))!=-1){
os.write(buffer,0,bytesRead);
}
}catch(FileNotFoundException e){
e.printStackTrace();
return;
} catch (IOException e) {
e.printStackTrace();
return;
}
}
public static void main(String args[]){
String url = "http://tieba.baidu.com/tb/picture/index.html";
dSomeImg dsi = new dSomeImg();
dsi.parseUrl(url);
}

}