1 引入依赖
<dependency>
<groupId>org.jsoup</groupId>
<artifactId>jsoup</artifactId>
<version>1.14.3</version>
</dependency>
2 工具代码
import lombok.extern.slf4j.Slf4j;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.HttpURLConnection;
import java.net.MalformedURLException;
import java.net.URL;
@Slf4j
public class HolidayUtil {
public static void main(String[] args) {
getDateFromPage("https://wannianrili.bmcx.com/");
}
public static void getDateFromPage(String url) {
String pageContent = getPageContent(url);
Document parse = Jsoup.parse(pageContent);
Element body = parse.body();
Elements monthDay = body.getElementsByClass("wnrl_k_you");
for (Element day : monthDay) {
String style = day.attr("style");
if( "display: block;".equals(style)) {
Elements dayAttr = day.children();
String dayTitle = dayAttr.get(0).html();
String dateNum = dayAttr.get(1).html();
String dateStr = dayAttr.get(2).html();
log.info("dayTitle:"+dayTitle+", dateNum:"+ dateNum+", dateStr"+dateStr);
}
}
}
public static String getPageContent(String urlStr) {
StringBuilder sb = new StringBuilder();
HttpURLConnection httpUrlConn = null;
InputStream input = null;;
InputStreamReader read = null;
BufferedReader br = null;
try {
URL url = new URL(urlStr);
httpUrlConn = (HttpURLConnection) url.openConnection();
httpUrlConn.setDoInput(true);
httpUrlConn.setRequestMethod("GET");
httpUrlConn.setRequestProperty("User-Agent", "Mozilla/4.0 (compatible; MSIE 5.0; Windows NT; DigExt)");
input = httpUrlConn.getInputStream();
read = new InputStreamReader(input, "utf-8");
br = new BufferedReader(read);
String data = br.readLine();
while(data!=null) {
sb.append(data);
data=br.readLine();
}
} catch (MalformedURLException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
} finally {
try {
if(br != null) br.close();
} catch (IOException e) {
e.printStackTrace();
}
try {
if(read !=null) read.close();
} catch (IOException e) {
e.printStackTrace();
}
try {
if(input != null)input.close();
} catch (IOException e) {
e.printStackTrace();
}
if(httpUrlConn != null)httpUrlConn.disconnect();
}
return sb.toString();
}
}