使用HTTPclient做post请求的爬虫技术

 

 

 

package com.itheima.spider.httpclient;

 

 

import org.apache.http.Header;

import org.apache.http.HttpEntity;

import org.apache.http.client.entity.UrlEncodedFormEntity;

import org.apache.http.client.methods.CloseableHttpResponse;

import org.apache.http.client.methods.HttpGet;

import org.apache.http.client.methods.HttpPost;

import org.apache.http.impl.client.CloseableHttpClient;

import org.apache.http.impl.client.HttpClients;

import org.apache.http.message.BasicNameValuePair;

import org.apache.http.util.EntityUtils;

 

 

import java.io.IOException;

import java.util.ArrayList;

 

 

public class HttpClientPost {

    public static void main(String[] args) throws IOException {

        //1.确定URL

        String indexUrl = "http://www.itcast.cn";

        //2 发送请求,获得数据

        //2.1 创建httpclient对象

        CloseableHttpClient httpClient = HttpClients.createDefault();

        //2.2创建httppost对象--通过URL得到

        HttpPost httpPost = new HttpPost(indexUrl);

        //2.2.1 设置请求头

        httpPost.setHeader("User-Agent","Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.25 Safari/537.36 Core/1.70.3676.400 QQBrowser/10.4.3505.400");

        //2.2.2 设置请求参数

        //先获得请求参数的键值对 list集合

        //一,先建立泛型为 键值对的 list集合

        ArrayList<BasicNameValuePair> basicNameValuePairs = new ArrayList<BasicNameValuePair>;

        //二,给集合中增加数据

        basicNameValuePairs.add(new BasicNameValuePair("txtUser","黑马"));

        basicNameValuePairs.add(new BasicNameValuePair("txtPass","123456"));

        basicNameValuePairs.add(new BasicNameValuePair("city","北京"));

        basicNameValuePairs.add(new BasicNameValuePair("birthday","1980-01-01"));

        basicNameValuePairs.add(new BasicNameValuePair("sex","1"));

        //三.把上面的装有表单数据的list集合给封装到请求体中entity

        UrlEncodedFormEntity formEntity = new UrlEncodedFormEntity(basicNameValuePairs);

        //四.把entity给装到请求体中

        httpPost.setEntity(formEntity);

        //2.3发送请求,获得响应

        CloseableHttpResponse response = httpClient.execute(httpPost);

        //2.4 把response中数据给解析出来

        //2.4.1 获得响应头,并且判断是否成功访问

        int statusCode = response.getStatusLine().getStatusCode();

        if(statusCode == 200){

            Header[] headers = response.getHeaders();

            for (Header header : headers) {

                System.out.println("响应头:name:"+header.getName()+"value:"+header.getValue());

            }

            //2.4.2 获得响应体

            HttpEntity entity = response.getEntity();

            //从响应体中获得网页内容并且打印

            String html = EntityUtils.toString(entity, "utf-8");

            System.out.println(html);

        }

        //2.5关闭资源

        httpClient.close();

 

 

    }

}

 

14b6e6dcb207d7948698e3d47a96c478f3a.jpg

 

1.缺点一个爬取的URL

4faf39132985d7dc8e300c6e1b8366ace23.jpg

//1.确定URL

String indexUrl = "http://www.itcast.cn";

 

2发送请求,获取数据

 

2.1创建httpclient对象

1e9b68c1af36d94d8634f721e12d6bf962d.jpg

//2.1 创建httpclient对象

CloseableHttpClient httpClient = HttpClients.createDefault();

 

2.2创建就HTTPpost对象

7f528b0c54e7dad5ff2242963935df7f344.jpg

//2.2创建httpget对象

HttpPost httpPost = new HttpPost(indexUrl);

        

        设置请求头,请求体()

a382ed556f617461fda386808ee22d58b54.jpg

//2.2.1 设置请求头

httpPost.setHeader("User-Agent","Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.25 Safari/537.36 Core/1.70.3676.400 QQBrowser/10.4.3505.400");

 

        设置请求参数:请求体(表单-数据和最下面的)

用于登录的登录名,密码 还有用户信息--键值对

83681d94573587532efed7cc11d469fd15c.jpg

 

//先获得请求参数的键值对 list集合

把上面的装有表单数据的list集合给封装到请求体中entity,再到请求体中

 

d78e59bb6fa8eeb77234c40b25365ca6202.jpg

//先获得请求参数的键值对 list集合

//一,先建立泛型为 键值对的 list集合

ArrayList<BasicNameValuePair> basicNameValuePairs = new ArrayList<BasicNameValuePair>;

//二,给集合中增加数据--表单数据来源于 网页源码中的form data中的数据

basicNameValuePairs.add(new BasicNameValuePair("txtUser","黑马"));

basicNameValuePairs.add(new BasicNameValuePair("txtPass","123456"));

basicNameValuePairs.add(new BasicNameValuePair("city","北京"));

basicNameValuePairs.add(new BasicNameValuePair("birthday","1980-01-01"));

basicNameValuePairs.add(new BasicNameValuePair("sex","1"));

//三.把上面的装有表单数据的list集合给封装到formentity

UrlEncodedFormEntity formEntity = new UrlEncodedFormEntity(basicNameValuePairs);

//四.把entity给装到请求体中

httpPost.setEntity(formEntity);

 

2.3发送请求,获取响应 -- response

e7e38b51f7baf021b860e66c21803d750ce.jpg

//2.3发送请求,获得响应

CloseableHttpResponse response = httpClient.execute(httpPost);

 

 

2.4获取响应头  响应状态码  响应体

716570f449b97551af7ab88d2245cdf26bf.jpg

//2.4 把response中数据给解析出来

//2.4.1 获得响应头,并且判断是否成功访问

int statusCode = response.getStatusLine().getStatusCode();

if(statusCode == 200){

Header[] headers = response.getHeaders();

for (Header header : headers) {

System.out.println("响应头:name:"+header.getName()+"value:"+header.getValue());

}

 

 

        获得响应体

从响应体中能获得 网页,并且把html页面给打印出来; 如果是json格式,需要别的ajax处理

93e9c9a2a804507b1c152f8817067155c0b.jpg

//2.4.2 获得响应体

HttpEntity entity = response.getEntity();

//从响应体中获得网页内容并且打印

String html = EntityUtils.toString(entity, "utf-8");

System.out.println(html);

}

 

 

2.5关闭资源

e07db677e2333c0869a657a10cade74763b.jpg

//2.5关闭资源

httpClient.close();

 

打印结果:

0c491e479c1785fda5af736b953f371359f.jpg

 

请求参数的样子

20669e01c78971a8c81ecc1ea708abc131b.jpg

 

 

 

 

package com.itheima.spider.httpclient;

 

 

import org.apache.http.Header;

import org.apache.http.HttpEntity;

import org.apache.http.client.entity.UrlEncodedFormEntity;

import org.apache.http.client.methods.CloseableHttpResponse;

import org.apache.http.client.methods.HttpGet;

import org.apache.http.client.methods.HttpPost;

import org.apache.http.impl.client.CloseableHttpClient;

import org.apache.http.impl.client.HttpClients;

import org.apache.http.message.BasicNameValuePair;

import org.apache.http.util.EntityUtils;

 

 

import java.io.IOException;

import java.util.ArrayList;

 

 

public class HttpClientPost {

    public static void main(String[] args) throws IOException {

        //1.确定URL

        String indexUrl = "http://www.itcast.cn";

        //2 发送请求,获得数据

        //2.1 创建httpclient对象

        CloseableHttpClient httpClient = HttpClients.createDefault();

        //2.2创建httppost对象--通过URL得到

        HttpPost httpPost = new HttpPost(indexUrl);

        //2.2.1 设置请求头

        httpPost.setHeader("User-Agent","Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.25 Safari/537.36 Core/1.70.3676.400 QQBrowser/10.4.3505.400");

        //2.2.2 设置请求参数

        //先获得请求参数的键值对 list集合

        //一,先建立泛型为 键值对的 list集合

        ArrayList<BasicNameValuePair> basicNameValuePairs = new ArrayList<BasicNameValuePair>;

        //二,给集合中增加数据

        basicNameValuePairs.add(new BasicNameValuePair("txtUser","黑马"));

        basicNameValuePairs.add(new BasicNameValuePair("txtPass","123456"));

        basicNameValuePairs.add(new BasicNameValuePair("city","北京"));

        basicNameValuePairs.add(new BasicNameValuePair("birthday","1980-01-01"));

        basicNameValuePairs.add(new BasicNameValuePair("sex","1"));

        //三.把上面的装有表单数据的list集合给封装到请求体中entity

        UrlEncodedFormEntity formEntity = new UrlEncodedFormEntity(basicNameValuePairs);

        //四.把entity给装到请求体中

        httpPost.setEntity(formEntity);

        //2.3发送请求,获得响应

        CloseableHttpResponse response = httpClient.execute(httpPost);

        //2.4 把response中数据给解析出来

        //2.4.1 获得响应头,并且判断是否成功访问

        int statusCode = response.getStatusLine().getStatusCode();

        if(statusCode == 200){

            Header[] headers = response.getHeaders();

            for (Header header : headers) {

                System.out.println("响应头:name:"+header.getName()+"value:"+header.getValue());

            }

            //2.4.2 获得响应体

            HttpEntity entity = response.getEntity();

            //从响应体中获得网页内容并且打印

            String html = EntityUtils.toString(entity, "utf-8");

            System.out.println(html);

        }

        //2.5关闭资源

        httpClient.close();

 

 

    }

}

 

66567c344a87224c18e7623797cc2500fa6.jpg

 

1.缺点一个爬取的URL

616eca6b25e9647ebb680faee0d8276b8da.jpg

//1.确定URL

String indexUrl = "http://www.itcast.cn";

 

2发送请求,获取数据

 

2.1创建httpclient对象

613eaf740b97aeacdbf8e379ecb19e6b0f6.jpg

//2.1 创建httpclient对象

CloseableHttpClient httpClient = HttpClients.createDefault();

 

2.2创建就HTTPpost对象

a06067fc447fcda61a146ee696f1c92074c.jpg

//2.2创建httpget对象

HttpPost httpPost = new HttpPost(indexUrl);

        

        设置请求头,请求体()

90b90e10d93ddccf12ca9eae8047e6cdc0e.jpg

//2.2.1 设置请求头

httpPost.setHeader("User-Agent","Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.25 Safari/537.36 Core/1.70.3676.400 QQBrowser/10.4.3505.400");

 

        设置请求参数:请求体(表单-数据和最下面的)

用于登录的登录名,密码 还有用户信息--键值对

adf5bb5a351006e4bd781e89837e46e7234.jpg

 

//先获得请求参数的键值对 list集合

把上面的装有表单数据的list集合给封装到请求体中entity,再到请求体中

 

d543d9c3c553acc166b6f0182b31be06795.jpg

//先获得请求参数的键值对 list集合

//一,先建立泛型为 键值对的 list集合

ArrayList<BasicNameValuePair> basicNameValuePairs = new ArrayList<BasicNameValuePair>;

//二,给集合中增加数据--表单数据来源于 网页源码中的form data中的数据

basicNameValuePairs.add(new BasicNameValuePair("txtUser","黑马"));

basicNameValuePairs.add(new BasicNameValuePair("txtPass","123456"));

basicNameValuePairs.add(new BasicNameValuePair("city","北京"));

basicNameValuePairs.add(new BasicNameValuePair("birthday","1980-01-01"));

basicNameValuePairs.add(new BasicNameValuePair("sex","1"));

//三.把上面的装有表单数据的list集合给封装到formentity

UrlEncodedFormEntity formEntity = new UrlEncodedFormEntity(basicNameValuePairs);

//四.把entity给装到请求体中

httpPost.setEntity(formEntity);

 

2.3发送请求,获取响应 -- response

02e34d8d1c2cdd94ec7eee8665446f20ca0.jpg

//2.3发送请求,获得响应

CloseableHttpResponse response = httpClient.execute(httpPost);

 

 

2.4获取响应头  响应状态码  响应体

f7a6753836428d79d4cfbe4dceb3f4db75b.jpg

//2.4 把response中数据给解析出来

//2.4.1 获得响应头,并且判断是否成功访问

int statusCode = response.getStatusLine().getStatusCode();

if(statusCode == 200){

Header[] headers = response.getHeaders();

for (Header header : headers) {

System.out.println("响应头:name:"+header.getName()+"value:"+header.getValue());

}

 

 

        获得响应体

从响应体中能获得 网页,并且把html页面给打印出来; 如果是json格式,需要别的ajax处理

75bfe4f087e48c3ac9fe5834b2cdd24794a.jpg

//2.4.2 获得响应体

HttpEntity entity = response.getEntity();

//从响应体中获得网页内容并且打印

String html = EntityUtils.toString(entity, "utf-8");

System.out.println(html);

}

 

 

2.5关闭资源

34bbcbe65de230d01660011b105d442c347.jpg

//2.5关闭资源

httpClient.close();

 

打印结果:

bd504b4c2ef81380b3ca45745c44faa0e8c.jpg

 

请求参数的样子

32fc4ae6261cefcd1e8772e6d07a965cb9c.jpg

 

 

 

 

package com.itheima.spider.httpclient;

 

 

import org.apache.http.Header;

import org.apache.http.HttpEntity;

import org.apache.http.client.entity.UrlEncodedFormEntity;

import org.apache.http.client.methods.CloseableHttpResponse;

import org.apache.http.client.methods.HttpGet;

import org.apache.http.client.methods.HttpPost;

import org.apache.http.impl.client.CloseableHttpClient;

import org.apache.http.impl.client.HttpClients;

import org.apache.http.message.BasicNameValuePair;

import org.apache.http.util.EntityUtils;

 

 

import java.io.IOException;

import java.util.ArrayList;

 

 

public class HttpClientPost {

    public static void main(String[] args) throws IOException {

        //1.确定URL

        String indexUrl = "http://www.itcast.cn";

        //2 发送请求,获得数据

        //2.1 创建httpclient对象

        CloseableHttpClient httpClient = HttpClients.createDefault();

        //2.2创建httppost对象--通过URL得到

        HttpPost httpPost = new HttpPost(indexUrl);

        //2.2.1 设置请求头

        httpPost.setHeader("User-Agent","Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.25 Safari/537.36 Core/1.70.3676.400 QQBrowser/10.4.3505.400");

        //2.2.2 设置请求参数

        //先获得请求参数的键值对 list集合

        //一,先建立泛型为 键值对的 list集合

        ArrayList<BasicNameValuePair> basicNameValuePairs = new ArrayList<BasicNameValuePair>;

        //二,给集合中增加数据

        basicNameValuePairs.add(new BasicNameValuePair("txtUser","黑马"));

        basicNameValuePairs.add(new BasicNameValuePair("txtPass","123456"));

        basicNameValuePairs.add(new BasicNameValuePair("city","北京"));

        basicNameValuePairs.add(new BasicNameValuePair("birthday","1980-01-01"));

        basicNameValuePairs.add(new BasicNameValuePair("sex","1"));

        //三.把上面的装有表单数据的list集合给封装到请求体中entity

        UrlEncodedFormEntity formEntity = new UrlEncodedFormEntity(basicNameValuePairs);

        //四.把entity给装到请求体中

        httpPost.setEntity(formEntity);

        //2.3发送请求,获得响应

        CloseableHttpResponse response = httpClient.execute(httpPost);

        //2.4 把response中数据给解析出来

        //2.4.1 获得响应头,并且判断是否成功访问

        int statusCode = response.getStatusLine().getStatusCode();

        if(statusCode == 200){

            Header[] headers = response.getHeaders();

            for (Header header : headers) {

                System.out.println("响应头:name:"+header.getName()+"value:"+header.getValue());

            }

            //2.4.2 获得响应体

            HttpEntity entity = response.getEntity();

            //从响应体中获得网页内容并且打印

            String html = EntityUtils.toString(entity, "utf-8");

            System.out.println(html);

        }

        //2.5关闭资源

        httpClient.close();

 

 

    }

}

 

37753973e8dae5808034b34f9a07852e9c9.jpg

 

1.缺点一个爬取的URL

1148b226e18452649384cb18a311d892550.jpg

//1.确定URL

String indexUrl = "http://www.itcast.cn";

 

2发送请求,获取数据

 

2.1创建httpclient对象

33c4a8e82a4165895cfcd992e849cc124b3.jpg

//2.1 创建httpclient对象

CloseableHttpClient httpClient = HttpClients.createDefault();

 

2.2创建就HTTPpost对象

15e7c8669734c3856b8d719509bda1b9d64.jpg

//2.2创建httpget对象

HttpPost httpPost = new HttpPost(indexUrl);

        

        设置请求头,请求体()

316c2cd98bb143574b911533811ca3a74cd.jpg

//2.2.1 设置请求头

httpPost.setHeader("User-Agent","Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.25 Safari/537.36 Core/1.70.3676.400 QQBrowser/10.4.3505.400");

 

        设置请求参数:请求体(表单-数据和最下面的)

用于登录的登录名,密码 还有用户信息--键值对

0b72e2d8467b05cb88a506d1eb8e238c06e.jpg

 

//先获得请求参数的键值对 list集合

把上面的装有表单数据的list集合给封装到请求体中entity,再到请求体中

 

d612cc890f6eaee648a8839b33588c0c016.jpg

//先获得请求参数的键值对 list集合

//一,先建立泛型为 键值对的 list集合

ArrayList<BasicNameValuePair> basicNameValuePairs = new ArrayList<BasicNameValuePair>;

//二,给集合中增加数据--表单数据来源于 网页源码中的form data中的数据

basicNameValuePairs.add(new BasicNameValuePair("txtUser","黑马"));

basicNameValuePairs.add(new BasicNameValuePair("txtPass","123456"));

basicNameValuePairs.add(new BasicNameValuePair("city","北京"));

basicNameValuePairs.add(new BasicNameValuePair("birthday","1980-01-01"));

basicNameValuePairs.add(new BasicNameValuePair("sex","1"));

//三.把上面的装有表单数据的list集合给封装到formentity

UrlEncodedFormEntity formEntity = new UrlEncodedFormEntity(basicNameValuePairs);

//四.把entity给装到请求体中

httpPost.setEntity(formEntity);

 

2.3发送请求,获取响应 -- response

ee3334657c693a17bfd03424c6f895be42a.jpg

//2.3发送请求,获得响应

CloseableHttpResponse response = httpClient.execute(httpPost);

 

 

2.4获取响应头  响应状态码  响应体

8203faccf7e72075ca687303aa551837120.jpg

//2.4 把response中数据给解析出来

//2.4.1 获得响应头,并且判断是否成功访问

int statusCode = response.getStatusLine().getStatusCode();

if(statusCode == 200){

Header[] headers = response.getHeaders();

for (Header header : headers) {

System.out.println("响应头:name:"+header.getName()+"value:"+header.getValue());

}

 

 

        获得响应体

从响应体中能获得 网页,并且把html页面给打印出来; 如果是json格式,需要别的ajax处理

9b1e79b08f647e1336fce3fc2649eaa2223.jpg

//2.4.2 获得响应体

HttpEntity entity = response.getEntity();

//从响应体中获得网页内容并且打印

String html = EntityUtils.toString(entity, "utf-8");

System.out.println(html);

}

 

 

2.5关闭资源

d5feaab0c01c2da5226c3ab2ef950982c65.jpg

//2.5关闭资源

httpClient.close();

 

打印结果:

184136bb6bec48ab999ddcb4d0ccdbc003c.jpg

 

请求参数的样子

621f6c005439dcc148b0fb5845146721995.jpg

 

 

转载于:https://my.oschina.net/u/4140608/blog/3059758

  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值