【实例简介】
【实例截图】
【核心代码】
package springboot.jsoup;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import java.io.*;
import java.net.HttpURLConnection;
import java.net.URL;
public class JsoupDemo {
private static OutputStream os;
public static void main(String[] args) {
try {
Document doc = Jsoup.connect("https://blog.csdn.net/qq_15260315").get();
//指定文件名及路径
File file = new File("D:\\jsoup\\word\\csdn.txt");
if (!file.exists()) {
file.createNewFile();
}
//获取标题和地址的对象
Elements titles = doc.getElementsByClass("article-item-box");
//写入本地
PrintWriter pw = new PrintWriter("D:\\jsoup\\word\\csdn.txt","UTF-8");
for (Element e:titles) {
Elements a = e.select("h4").select("a");
pw.println(a.attr("href"));
pw.println(a.text());
pw.println("------------------------------------------------------------------------------------------------------------------------------------");
try {
//增加访问量
Document d = Jsoup.connect(a.attr("href")).get();
//输出文章内容
Elements view = d.getElementsByClass("htmledit_views");
System.out.println(view.text());
//如果有图片,获取页面上的图片保存到本地
Elements imgs = view.select("img");
if (!imgs.isEmpty()) {
for (Element img : imgs) {
int j = 0;
//URL u = new URL(img.attr("src"));
URL u = new URL("https://avatar.csdn.net/0/B/B/2_qq_15260315.jpg");
HttpURLConnection connection = (HttpURLConnection) u.openConnection();
//因为服务器的安全设置不接受Java程序作为客户端访问,所以设置客户端的User Agent
connection.setRequestProperty("User-Agent", "Mozilla/4.0 (compatible; MSIE 5.0; Windows NT; DigExt)");
//获取数据流
InputStream is = connection.getInputStream();
//写入本地
os = new FileOutputStream(new File("E:\\jsoup\\img", a.text() j ".png"));
byte[] b = new byte[1024];
int i = 0;
while ((i = is.read(b)) != -1) {
os.write(b, 0, i);
}
is.close();
os.close();
j ;
}
}
}catch (Exception ex){
}
}
pw.close(); //关闭输出流
} catch (IOException e) {
e.printStackTrace();
}
}
}