- {
- "CCTV-1高清": { }
- {
- "CCTV-2高清": { }
- {
- "CCTV-3高清": { }
- {
- "CCTV-4高清": { }
package com.example.demo.controller;
import org.apache.http.HttpEntity;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.util.EntityUtils;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import java.io.IOException;
/**
* Created with IntelliJ IDEA.
*
* @Auther: ljt
* @Version 1.0
* @Date: 2020/08/04/15:49
* @Description:
*/
public class JsoupTest {
public static void main(String[] args) throws IOException {
String url = "http://ivi.bupt.edu.cn";
CloseableHttpClient httpClient = HttpClients.createDefault();//创建httpClient
HttpGet httpGet = new HttpGet(url);//创建httpget实例
CloseableHttpResponse response = httpClient.execute(httpGet);//执行get请求
HttpEntity entity = response.getEntity();//获取返回实体
String content = EntityUtils.toString(entity,"utf-8");//网页内容
response.close();//关闭流和释放系统资源
Jsoup.parse(content);
Document doc = Jsoup.parse(content);//解析网页得到文档对象
Elements elements = doc.getElementsByTag("title");//获取tag是title的所有dom文档
Element element = elements.get(0);//获取第一个元素
String title = element.text(); //.html是返回html
System.out.println("网页标题:"+title);
Elements div = doc.getElementsByClass("2u");
for(int i=0 ; i < div.size() ; i++){
String nameStr = div.get(i).getElementsByTag("p").text();
System.out.println("------------------------------------------");
System.out.println("电视台:"+ nameStr);
Elements a = div.get(i).getElementsByTag("a");
for(int j=0; j<a.size(); j++){
String path = a.get(j).attr("href");
String name = a.get(j).text();
System.out.println(name+","+url+path);
}
}
}
}