Jsoup+HttpClient获取新浪新闻数据

package com.test;

import java.io.IOException;  
import java.net.URI;  

import org.apache.http.Header;
import org.apache.http.HttpEntity;
import org.apache.http.HttpResponse; 
import org.apache.http.client.ClientProtocolException; 
import org.apache.http.client.methods.HttpGet;
import org.apache.http.client.utils.URIBuilder; 
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.util.EntityUtils;
  
 
 

/**
 *
 * 依赖 commons-httpclient-3.1.jar commons-codec-1.4.jar
 * 
 * @author tianjun
 *
 */
public class PostTest {

	public static void main(String[] args) throws  Exception  {   
		  
		  
		// (1)构造HttpClient的实例  
		  
		CloseableHttpClient httpCLient = HttpClients.createDefault(); 
		 
		  
          
        // 创建get请求实例  
        HttpGet httpget = new HttpGet() ;
        
        //设置参数
 //http://roll.news.sina.com.cn/s/channel.php?ch=01#col=91&spec=&type=&ch=01&k=&offset_page=0&offset_num=0&num=60&asc=&page=NaN
 //http://roll.news.sina.com.cn/s/channel.php?col=91&spec=&type=&ch=01&offset_page=0&offset_num=0&num=60&page=1       

          URI uri = new URIBuilder()
          .setScheme("http")
        .setHost("roll.news.sina.com.cn")
        .setPath("/s/channel.php")
        .setParameter("ch", "01")
        .setParameter("col", "91")
         .setParameter("spec","")
         .setParameter("type", "")
         .setParameter("ch", "01")
         .setParameter("offset_page", "0")
         .setParameter("offset_num", "0")
          .setParameter("num", "60")
        .setParameter("page", "1")
        
         
        .build();
        
        httpget.setURI(uri);
      //设置请求头信息 
        
/* */        
       
        httpget.setHeader("Accep", "*/*");
        httpget.setHeader("Accept-Encoding","gzip, deflate");
        httpget.setHeader("Accept-Language","zh-CN,zh;q=0.8,en-US;q=0.5,en;q=0.3");
        httpget.setHeader("Connection","keep-alive");
        httpget.setHeader("Host","roll.news.sina.com.cn");
        httpget.setHeader("Referer","http://roll.news.sina.com.cn/s/channel.php?ch=01");
        httpget.setHeader("User-Agent","Mozilla/5.0 (Windows NT 6.1; WOW64; rv:37.0) Gecko/20100101 Firefox/37.0");
        httpget.setHeader("Content-Type","text/html;charset=UTF-8");
        
 
 
        
        System.out.println("executing request "+httpget.getURI());  
          
        try  
        {  
              
            // 客户端执行get请求 返回响应实体  
            HttpResponse response = httpCLient.execute(httpget);  
              
            // 服务器响应状态行  
            System.out.println(response.getStatusLine());  
              
            Header[] heads = response.getAllHeaders();  
            // 打印所有响应头  
            for(Header h:heads){  
                System.out.println(h.getName()+":"+h.getValue());  
            }  
              
            // 获取响应消息实体  
            HttpEntity entity = response.getEntity();  
              
            System.out.println("------------------------------------");  
              
              
              
            if(entity != null){  
                                  
                //响应内容  
                System.out.println( new String(EntityUtils.toString(entity).getBytes("ISO-8859-1"),"gbk"));  
                  
                System.out.println("----------------------------------------");  
                // 响应内容长度  
                System.out.println("响应内容长度:"+entity.getContentLength());  
            }  
              
        } catch (ClientProtocolException e){  
            e.printStackTrace();  
        } catch (IOException e){  
            e.printStackTrace();  
        }finally{  
            httpCLient.getConnectionManager().shutdown();  
        }  
    }  

	 
}

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值