package com.dx.util;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStream;
import java.net.URL;
import java.net.URLConnection;
import java.util.Date;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Set;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
public class CrawlerUtil {
public static void main(String[] args) {
//网址
String url2 = "http://image.baidu.com/search/index?tn=baiduimage&ipn=r&ct=201326592&cl=2&lm=-1&st=-1&fm=index&fr=&sf=1&fmq=&pv=&ic=0&nc=1&z=&se=1&showtab=0&fb=0&width=&height=&face=0&istype=2&ie=utf-8&word=美女&f=3&oq=meinv&rsp=0#z=0&pn=&ic=0&st=-1&face=0&s=0&lm=-1";
Set<String> set = getImgUrls(url2);
downImg(set);
}
/*
* 获取图片地址
*/
public static Set<String> getImgUrls(String url){
Set<String> set = new HashSet<String>();
try {
//获取文本对象
Document dom = Jsoup.parse(new URL(url),500000);
//根据标签名获取
Elements es3 = dom.getElementsByTag("img");
System.out.println(es3.size());
for(Element e:es3){
//获取图片地址
String img_url = e.attr("src");
//筛选出以.jpg格式的图片
if(img_url.endsWith(".jpg")){
set.add(img_url);
}
}
} catch (Exception e) {
e.printStackTrace();
}
return set;
}
/*
* 下载图片
*/
public static void downImg(Set<String>set){
InputStream inputStream = null;
OutputStream outputStream = null;
File file = new File("D:\\pic");
try {
if(set.size() != 0){
Iterator<String> it = set.iterator();
while(it.hasNext()){
//1.获取网址
URL u = new URL(it.next());
//2.打开连接
URLConnection conn = u.openConnection();
//3.获取输入流与写出流
inputStream = conn.getInputStream();
//判断文件是否存在
if(!file.exists()){
file.mkdir();
}
outputStream = new FileOutputStream(new File("D:\\pic\\"+new Date().getTime()+".jpg"));
//4.将源代码写入内存(设置编码)
byte[] b= new byte[2048];
int len = 0;
while((len = inputStream.read(b)) != -1){
outputStream.write(b, 0, len);
}
}
}
} catch (Exception e) {
e.printStackTrace();
}finally{
//关闭I/o
try {
if(outputStream != null)outputStream.close();
if(inputStream != null)inputStream.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
import java.io.BufferedReader;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStream;
import java.net.URL;
import java.net.URLConnection;
import java.util.Date;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Set;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
public class CrawlerUtil {
public static void main(String[] args) {
//网址
String url2 = "http://image.baidu.com/search/index?tn=baiduimage&ipn=r&ct=201326592&cl=2&lm=-1&st=-1&fm=index&fr=&sf=1&fmq=&pv=&ic=0&nc=1&z=&se=1&showtab=0&fb=0&width=&height=&face=0&istype=2&ie=utf-8&word=美女&f=3&oq=meinv&rsp=0#z=0&pn=&ic=0&st=-1&face=0&s=0&lm=-1";
Set<String> set = getImgUrls(url2);
downImg(set);
}
/*
* 获取图片地址
*/
public static Set<String> getImgUrls(String url){
Set<String> set = new HashSet<String>();
try {
//获取文本对象
Document dom = Jsoup.parse(new URL(url),500000);
//根据标签名获取
Elements es3 = dom.getElementsByTag("img");
System.out.println(es3.size());
for(Element e:es3){
//获取图片地址
String img_url = e.attr("src");
//筛选出以.jpg格式的图片
if(img_url.endsWith(".jpg")){
set.add(img_url);
}
}
} catch (Exception e) {
e.printStackTrace();
}
return set;
}
/*
* 下载图片
*/
public static void downImg(Set<String>set){
InputStream inputStream = null;
OutputStream outputStream = null;
File file = new File("D:\\pic");
try {
if(set.size() != 0){
Iterator<String> it = set.iterator();
while(it.hasNext()){
//1.获取网址
URL u = new URL(it.next());
//2.打开连接
URLConnection conn = u.openConnection();
//3.获取输入流与写出流
inputStream = conn.getInputStream();
//判断文件是否存在
if(!file.exists()){
file.mkdir();
}
outputStream = new FileOutputStream(new File("D:\\pic\\"+new Date().getTime()+".jpg"));
//4.将源代码写入内存(设置编码)
byte[] b= new byte[2048];
int len = 0;
while((len = inputStream.read(b)) != -1){
outputStream.write(b, 0, len);
}
}
}
} catch (Exception e) {
e.printStackTrace();
}finally{
//关闭I/o
try {
if(outputStream != null)outputStream.close();
if(inputStream != null)inputStream.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
}