<dependencies>
<dependency>
<groupId>org.jsoup</groupId>
<artifactId>jsoup</artifactId>
<version>1.10.3</version>
</dependency>
<dependency>
<groupId>org.apache.httpcomponents</groupId>
<artifactId>httpcore</artifactId>
<version>4.4.10</version>
</dependency>
<dependency>
<groupId>org.apache.httpcomponents</groupId>
<artifactId>httpclient</artifactId>
<version>4.5.6</version>
</dependency>
<dependency>
<groupId>commons-io</groupId>
<artifactId>commons-io</artifactId>
<version>2.6</version>
</dependency>
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-log4j12</artifactId>
<version>1.7.25</version>
</dependency>
</dependencies>
package com.regex;
import org.apache.commons.io.FileUtils;
import org.apache.http.HttpEntity;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.util.EntityUtils;
import org.jsoup.*;
import org.jsoup.nodes.*;
import org.jsoup.select.Elements;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
public class Demo {
public static void main(String[] args) throws IOException {
CloseableHttpClient httpClient = HttpClients.createDefault();
HttpGet httpGet = new HttpGet("https://ibaotu.com/shipin/");
CloseableHttpResponse httpResponse = httpClient.execute(httpGet);
HttpEntity httpEntity = httpResponse.getEntity();
String content = EntityUtils.toString(httpEntity, "utf-8");
Document document = Jsoup.parse(content);
Elements elements = document.select("div.media-list div.video-play video");
for(int i = 0; i < elements.size(); i++) {
Element element = elements.get(i);
String attr = element.attr("src");
CloseableHttpResponse httpResponse1 = httpClient.execute(new HttpGet("https:" + attr));
HttpEntity httpEntity1 = httpResponse1.getEntity();
InputStream stream = httpEntity1.getContent();
FileUtils.copyToFile(stream, new File("E:/video/" + i + ".mp4"));
stream.close();
}
}
}
package com.regex;
import org.apache.http.HttpEntity;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.util.EntityUtils;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.select.Elements;
import java.io.*;
public class TextDemo {
public static void main(String[] args) throws IOException {
CloseableHttpClient httpClient = HttpClients.createDefault();
HttpGet httpGet = new HttpGet("https://www.chinanews.com/gn/2021/06-10/9496792.shtml");
CloseableHttpResponse httpResponse = httpClient.execute(httpGet);
if (httpResponse.getStatusLine().getStatusCode() == 200) {
HttpEntity httpEntity = httpResponse.getEntity();
String content = EntityUtils.toString(httpEntity, "utf-8");
Document document = Jsoup.parse(content);
Elements elements = document.select("div.left_zw p");
String text = elements.text();
System.out.println(text);
FileWriter fw = new FileWriter("C:\\Users\\ASUS\\Desktop\\news.txt");
fw.write(text);
fw.flush();
System.out.println("下载成功!");
}
httpResponse.close();
httpClient.close();
}
}