第7个httpClient 例子--httpclient+jsoup解析

目标获取博客园的标题与口号:https://www.cnblogs.com/

HTML:代码:

<!DOCTYPE html>
<html lang="zh-cn">
<head>
    <meta charset="utf-8" />
    <meta name="viewport" content="width=device-width, initial-scale=1" />
    <meta name="referrer" content="always" />
    <title>博客园 - 开发者的网上家园</title>
    <meta name="keywords" content="开发者,博客园,开发者,程序猿,程序媛,极客,编程,代码,开源,IT网站,Developer,Programmer,Coder,Geek,技术社区" />
        <meta name="description" content="博客园是一个面向开发者的知识分享社区。自创建以来,博客园一直致力并专注于为开发者打造一个纯净的技术交流社区,推动并帮助开发者通过互联网分享知识,从而让更多开发者从中受益。博客园的使命是帮助开发者用代码改变世界。" />
    <link rel="shortcut icon" href="//common.cnblogs.com/favicon.ico" type="image/x-icon" />
    <link rel="Stylesheet" type="text/css" href="/bundles/aggsite.css?v=IhfFDNk6saBQuSizNqMno4eFb5L3OoXlsUCqkaSgNvA1" />
    <link id="RSSLink" title="RSS" type="application/rss+xml" rel="alternate" href="http://feed.cnblogs.com/blog/sitehome/rss" />
    <script src="//common.cnblogs.com/script/jquery.js" type="text/javascript"></script>
    <script src="/bundles/aggsite.js?v=vWqa5z-vvnUBiauXGl6S0-ZbtOAq_fbE-A1hKZngtlw1" type="text/javascript"></script>
</head>
<body>
    <div id="wrapper">
        <div id="hd_info">
            <div id="cnts">
                <div id="site_nav_top">代码改变世界</div>
                <div id="login_area"><span id="span_userinfo"></span></div>
                <div class="clear"></div>
            </div>
        </div>
        <div id="header">
            <p class="h_r_3"></p><p class="h_r_2"></p><p class="h_r_1"></p>
            <div id="header_block">
                <div id="logo">
                    <h1>
                        <a href="https://www.cnblogs.com/" title="开发者的网上家园"><img src="/images/logo_small.gif" alt="博客园Logo" width="142" height="55" /></a>
                    </h1>
                </div>

                <div class="clear"></div>
            </div>
            <p class="h_r_1"></p><p class="h_r_2"></p><p class="h_r_3"></p>
        </div>

 

pom.xml

<dependencies>
    <dependency>
        <groupId>org.apache.httpcomponents</groupId>
        <artifactId>httpclient</artifactId>
        <version>4.5.2</version>
    </dependency>
    <!-- 添加Jsoup支持 -->
    <dependency>
        <groupId>org.jsoup</groupId>
        <artifactId>jsoup</artifactId>
        <version>1.10.2</version>
    </dependency>
</dependencies>

代码:

import org.apache.http.HttpEntity;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.util.EntityUtils;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

public class HttpClientjsouDemo01 {
    public static void main(String[] args) throws Exception {
        //创建实例化对象
        CloseableHttpClient httpClient = HttpClients.createDefault();
        //创建实例化对象httpget
        HttpGet httpGet = new HttpGet("http://www.cnblogs.com/");
        //执行get请求
        CloseableHttpResponse response = httpClient.execute(httpGet);
        //返回实体整个网页内容
        HttpEntity entity = response.getEntity();
        String toString = EntityUtils.toString(entity,"utf-8");
        response.close();//关闭请求流释放系统资源
        
        //解析网页,得到文档对象
        Document doc = Jsoup.parse(toString);
        //获取tag 是title的所有dom元素
        Elements elements = doc.getElementsByTag("title");
        //获取第一个元素
        Element element = elements.get(0);
        //返回元素的文本
        String text = element.text();
        System.out.println("网页标题是:"+elements);

        Element element2=doc.getElementById("site_nav_top"); // 获取id=site_nav_top的DOM元素
        String navTop=element2.text(); // 返回元素的文本
        System.out.println("口号:"+navTop);


    }
}

运行结果:

网页标题是:<title>博客园 - 开发者的网上家园</title>
口号:代码改变世界

Process finished with exit code 0

 

转载于:https://my.oschina.net/ch66880/blog/1525495

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值