Jsoup的使用

最新推荐文章于 2023-03-21 09:21:09 发布

Franky_jie

最新推荐文章于 2023-03-21 09:21:09 发布

阅读量929

点赞数 1

分类专栏：学生

本文链接：https://blog.csdn.net/Franky_jie/article/details/54969881

版权

学生专栏收录该内容

16 篇文章 0 订阅

订阅专栏

1、Jsoupjar包的下载与使用
我们日后会用到许许多多的jar包，如果每次都要一个一个的到网上去下载导入使用，是非常麻烦的，所以这里推荐一种方式，在eclipse下可以将一个项目转换成Maven项目，转换成这个项目后，可以很方便的帮我们使用某个jar包

转换成maven项目后，我们就可以到下列网站找到我们想要的想要的jar包

https://mvnrepository.com/

这里我们找到我们想要的Jsoup包，

这里写图片描述

这样子按保存后它就可以帮你自动下载你想要的jar包，其他的jar包都可以这样下载使用,但是要注意的是如果在这个网站找不到（比如一些比较老的jar包，就需要自己手工下载导入使用）

2、Jsoup的使用实例
Jsoup是一个非常强大的html解析器，使用起来也非常的方便，现在我们来看看它的使用
下面是一个下载图片网站的小程序

package com.mashensoft.jsoup;

import java.io.BufferedInputStream;
import java.io.BufferedOutputStream;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;

import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

public class GetPicDemo {
    /**
     * 功能：download某个图片地址的图片
     * @param myURL
     */
    public static void downLoadPic(String myURL){
            URL url = null ;
            try {
                //if(!(myURL.equals(""))){
                url = new URL(myURL);
                BufferedInputStream bis = new BufferedInputStream(url.openConnection().getInputStream());
                byte myArray[] = new byte[1024*100];
                int len = 0 ;
                File f = new File("E:/图片/"+getPicName(myURL));
                BufferedOutputStream bos = new BufferedOutputStream(new FileOutputStream(f));
                while((len=bis.read(myArray))!=-1){
                    bos.write(myArray,0,len);
                }
                bos.flush();
                bos.close();
                bis.close();
                //}

        } catch (MalformedURLException e) {
            e.printStackTrace();
            System.err.println(myURL);
        } catch (IOException e) {
            e.printStackTrace();
        }
    }
    /**
     * 功能：从URL中获得文件名，方便我们使用来下载图片
     * @param myURL
     * @return
     */
    public static String getPicName(String myURL){
        String picName = "" ;
        int beginIndex = myURL.lastIndexOf("/");
        picName = myURL.substring(beginIndex+1);
        return picName ;
    }
    /**
     * 在图片页面找到图片的图片地址
     * @param pageURL
     * @return
     */
    public static String getDownloadPicURLFromPage(String pageURL){
        String downloadPicURL = "" ;
        try {
            Document doc = Jsoup.connect(pageURL).get();
            if(doc != null && !(Jsoup.connect(pageURL).get().html().contains("唔，未找到任何页面！！！"))){
                Element e = doc.getElementById("J_worksImg");
                //System.out.println(e.attr("src"));
                if(e != null && !e.attr("src").equals("")){
                    downloadPicURL = e.attr("src");
                }
            }           
        } catch (IOException e) {
            e.printStackTrace();
        }
        return downloadPicURL;
    }
    public static String test(String pageURL){
        String downloadPicURL = "" ;
        try {
            Document doc = Jsoup.connect(pageURL).get();
            if(doc!=null&&!(doc.html().contains("唔，未找到任何页面！！！"))){
                Element element = doc.getElementById("J_worksImg");
                if(element!=null&&!(element.attr("src").equals(""))){
                    downloadPicURL = element.attr("src");
                }
            }
        } catch (IOException e) {
            e.printStackTrace();
        }
        return downloadPicURL;
    }
    /**
     * 获取一个分页里的所有图片网页
     * @param PicIntroPage
     * @return
     */
    public static List getAllPicIntroFromPage(String PicIntroPage){
        List<String> allPicIntro = new ArrayList<String>();
        try {
            Document doc = Jsoup.connect(PicIntroPage).get();
            Elements elements = doc.getElementsByClass("block works-detail hover-none");
            for(int i=0;i<elements.size();i++){
                Element element = elements.get(i) ;
                String PicIntro = element.attr("href");
                //System.out.println(PicIntro);
                allPicIntro.add(PicIntro);
            }
        } catch (IOException e) {
            e.printStackTrace();
        }
        return allPicIntro;
    }
    /**
     * 返回图片网站的所有分页地址
     * @param url
     */
    public static List getAllPage(String firstPageUrl){
        List<String> allPage = new ArrayList<String>();
        try {
            Document doc = Jsoup.connect(firstPageUrl).get();
            Elements elements = doc.getElementsByClass("seo-page-num");
            for(int i=0;i<elements.size();i++){
                Element element = elements.get(i);
                String page = element.attr("href");
                //System.out.println(page);
                allPage.add(page);
            }
        } catch (IOException e) {
            e.printStackTrace();
        }
        return allPage ;
    }
    /**
     * 下载nipic某个分类的所有图片
     * @param PageURL
     */
    public static void downAllPage(String PageURL){
        List<String> allPage = getAllPage("http://www.nipic.com/topic/show_27036_1.html?ll");
        for (Iterator iterator1 = allPage.iterator(); iterator1.hasNext();) {
            String PicIntroPage = (String) iterator1.next();
            System.out.println();
            System.out.println("----------------------------->" + PicIntroPage + "\n\n\n\n");
            List<String> allPicIntro = getAllPicIntroFromPage(PicIntroPage);
            for(Iterator iterator2 = allPicIntro.iterator();iterator2.hasNext();){
                String pageURL = (String)iterator2.next();
                System.out.println("----------------------------->" + pageURL);
                String downloadPicURL = test(pageURL);
                System.out.println("下载地址：" + downloadPicURL);
                //downLoadPic(downloadPicURL)   ;           
            }           
        }
    }
    public static void main(String[] args) {

        downAllPage("http://www.nipic.com/topic/show_27036_1.html?ll");
    }

}