实现方式很简单,例如获取甘肃省教育考试院关于专升本的通知,代码如下:
public class Crawling {
public static void main(String[] args) {
BufferedReader in = null;
try {
URL url = new URL("https://www.ganseea.cn/");
in = new BufferedReader(new InputStreamReader(url.openStream()));
boolean pa = false;
String st;
while ((st = in.readLine()) != null) {
if (st.contains("通知公告</span>")) {
pa = true;
}
if (st.contains("院校之窗")) {
break;
}
if (pa) {
if (st.contains("高职(专科)升本科") || st.contains("专升本")) {
String http = "https://www.ganseea.cn" + st.substring(st.indexOf("href=\"/html") + 6, st.indexOf("\" target="));
String title = st.substring(st.indexOf("title=") + 6, (st.indexOf("\">", st.indexOf("title="))) + 1);
System.out.println(title);
System.out.println(http);
}
}
}
} catch (IOException e) {
e.printStackTrace();
} finally {
if (in != null) {
try {
in.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
}
}