/***
* 美图抓取
* http://www.tupianzj.com/meinv/
* @time 2014-9-5上午11:10:25
*/
public static void search2() {
String httpUrl = "http://www.tupianzj.com/meinv/";
try {
Document doc = Jsoup.connect(httpUrl).get();
Elements items = doc.select(".meinv970 dl");
System.out.println(items);
for(Element item:items){
Elements childItemsDT = item.select("dt h2 a");
String title=childItemsDT.html();
System.out.println(childItemsDT.html());
Elements childItemsDD1 = item.select("dd li a img");
for(Element childItemdd1:childItemsDD1){
String picUrl0=childItemdd1.attr("src");
saveImg("小",title, picUrl0);
System.out.println(picUrl0);
}
Elements childItemsDD2 = item.select("dd li .moxflashtext a");
System.out.println(childItemsDD2);
for(Element childItem:childItemsDD2){
String secondUrl="http://www.tupianzj.com"+childItem.attr("href");
Document childDoc = Jsoup.connect(secondUrl).get();
String picUrl=childDoc.select(".pictu900 img").attr("src");
System.out.println(picUrl);
saveImg("大",title, picUrl);
Elements thirdChilds=childDoc.select(".pages li a");
for(Element thirdChild:thirdChilds){
String isHave = thirdChild.attr("href");
if(!"".endsWith(isHave) && !"javascript:dPlayPre();".endsWith(isHave) && !"#".endsWith(isHave) && !"#".endsWith(isHave)){
String url=secondUrl.substring(0,secondUrl.lastIndexOf("/")+1)+isHave;
Document secondChildDoc = Jsoup.connect(url).get();
String picUrl1=secondChildDoc.select(".pictu900 img").attr("src");
System.out.println(picUrl1);
saveImg("大",title, picUrl1);
}
}
}
}
} catch (IOException e) {
e.printStackTrace();
}
}
/***
* 保存图片
* @time 2014-9-5上午11:10:25
*/
public static String saveImg(String tag,String name,String picUrl) {
String fileName = "";
fileName = tag+System.currentTimeMillis()+".jpg";
File realDirectory = new File("D:/pic/"+name+"/");
if (!realDirectory.exists()) {
realDirectory.mkdirs();
}
try {
// 构造URL
URL url = new URL(picUrl);
// 打开连接
URLConnection con = url.openConnection();
// 输入流
InputStream is = con.getInputStream();
// 1K的数据缓冲
byte[] bs = new byte[1024];
// 读取到的数据长度
int len;
// 输出的文件流
OutputStream os = new FileOutputStream("D:/pic/"+name+"/"+fileName);
// 开始读取
while ((len = is.read(bs)) != -1) {
os.write(bs, 0, len);
}
// 完毕,关闭所有链接
os.close();
is.close();
} catch (Exception e) {
e.printStackTrace();
}
return fileName;
}
Jsoup数据抓取
最新推荐文章于 2023-02-02 21:11:40 发布