- /***
- * 美图抓取
- * http://www.tupianzj.com/meinv/
- * @time 2014-9-5上午11:10:25
- */
- public static void search2() {
- String httpUrl = "http://www.tupianzj.com/meinv/";
- try {
- Document doc = Jsoup.connect(httpUrl).get();
- Elements items = doc.select(".meinv970 dl");
- System.out.println(items);
- for(Element item:items){
- Elements childItemsDT = item.select("dt h2 a");
- String title=childItemsDT.html();
- System.out.println(childItemsDT.html());
- Elements childItemsDD1 = item.select("dd li a img");
- for(Element childItemdd1:childItemsDD1){
- String picUrl0=childItemdd1.attr("src");
- saveImg("小",title, picUrl0);
- System.out.println(picUrl0);
- }
- Elements childItemsDD2 = item.select("dd li .moxflashtext a");
- System.out.println(childItemsDD2);
- for(Element childItem:childItemsDD2){
- String secondUrl="http://www.tupianzj.com"+childItem.attr("href");
- Document childDoc = Jsoup.connect(secondUrl).get();
- String picUrl=childDoc.select(".pictu900 img").attr("src");
- System.out.println(picUrl);
- saveImg("大",title, picUrl);
- Elements thirdChilds=childDoc.select(".pages li a");
- for(Element thirdChild:thirdChilds){
- String isHave = thirdChild.attr("href");
- if(!"".endsWith(isHave) && !"javascript:dPlayPre();".endsWith(isHave) && !"#".endsWith(isHave) && !"#".endsWith(isHave)){
- String url=secondUrl.substring(0,secondUrl.lastIndexOf("/")+1)+isHave;
- Document secondChildDoc = Jsoup.connect(url).get();
- String picUrl1=secondChildDoc.select(".pictu900 img").attr("src");
- System.out.println(picUrl1);
- saveImg("大",title, picUrl1);
- }
- }
- }
- }
- } catch (IOException e) {
- e.printStackTrace();
- }
- }
- /***
- * 保存图片
- * @time 2014-9-5上午11:10:25
- */
- public static String saveImg(String tag,String name,String picUrl) {
- String fileName = "";
- fileName = tag+System.currentTimeMillis()+".jpg";
- File realDirectory = new File("D:/pic/"+name+"/");
- if (!realDirectory.exists()) {
- realDirectory.mkdirs();
- }
- try {
- // 构造URL
- URL url = new URL(picUrl);
- // 打开连接
- URLConnection con = url.openConnection();
- // 输入流
- InputStream is = con.getInputStream();
- // 1K的数据缓冲
- byte[] bs = new byte[1024];
- // 读取到的数据长度
- int len;
- // 输出的文件流
- OutputStream os = new FileOutputStream("D:/pic/"+name+"/"+fileName);
- // 开始读取
- while ((len = is.read(bs)) != -1) {
- os.write(bs, 0, len);
- }
- // 完毕,关闭所有链接
- os.close();
- is.close();
- } catch (Exception e) {
- e.printStackTrace();
- }
- return fileName;
- }
Jsoup数据抓取
最新推荐文章于 2024-01-28 20:51:24 发布