【无标题】

爬虫第一天

爬虫

1、 爬虫的入门

1 简单介绍

​ 什么是爬虫?

网络爬虫(又称为网页蜘蛛,网络机器人,在FOAF社区中间,更经常的称为网页追逐者),是一种按照一定的规则,自动地抓取万维网信息的程序或者脚本。另外一些不常使用的名字还有蚂蚁、自动索引、模拟程序或者蠕虫。

2 简单的爬虫编程

package cn.imust;

import org.apache.http.HttpEntity;
import org.apache.http.client.HttpClient;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.util.EntityUtils;

public class test1 {
    public static void main(String[] args) throws Exception {
            //1.创建httpClient对象
        CloseableHttpClient httpClient = HttpClients.createDefault();

        //输入网址
        HttpGet httpGet = new HttpGet("http://123.56.125.121:8080/tavel/admin/login.html");
        //执行请求
        CloseableHttpResponse response = httpClient.execute(httpGet);
        //获得请求,判断状态
        if (response.getStatusLine().getStatusCode()==200){
            HttpEntity httpEntity = response.getEntity();
            String content = EntityUtils.toString(httpEntity, "utf8");
            System.out.println(content);
        }

    }
}

控制台输出

后台管理-登陆 ```html




会输出一个静态的网页源码,即访问成功。

### 3 HttpGet

``不带参数的httpGet``

```java
package cn.imust;

import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.util.EntityUtils;

import javax.swing.text.html.parser.Entity;
import java.io.IOException;

public class HttpGetTest {
    public static void main(String[] args) {
        //创建对象
        CloseableHttpClient httpClient = HttpClients.createDefault();

        String pr= "http://123.56.125.121:8080/tavel/admin/login.html";
        String jd =  "https://www.jd.com/";
        String it= "https://www.itcast.com/";
        //输入访问地址
        HttpGet httpGet= new HttpGet(it);


        CloseableHttpResponse response= null;
        //使用httpClient发送请求,获得response
        try {
            response = httpClient.execute(httpGet);
            if (response.getStatusLine().getStatusCode()==200){
                String string = EntityUtils.toString(response.getEntity(),"utf8");
                System.out.println(string.length());
            }
        }catch (IOException e ){
            e.printStackTrace();
        }finally {
            try {
                response.close();
            } catch (IOException e) {
                e.printStackTrace();
            }
            try {
                httpClient.close();
            } catch (IOException e) {
                e.printStackTrace();
            }
        }


        //解析响应


    }
}

带参数的httpGet

package cn.imust;

import com.sun.javafx.fxml.builder.URLBuilder;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.client.utils.URIBuilder;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.util.EntityUtils;

import java.io.IOException;

public class HttpGetParamTest {
    public static void main(String[] args) throws Exception{
        //创建对象
        CloseableHttpClient httpClient = HttpClients.createDefault();

        //https://www.itcast.com
        URIBuilder uriBuilder = new URIBuilder("https://yun.itheima.com/search");
        uriBuilder.setParameter("keys","java");


        String pr= "http://123.56.125.121:8080/tavel/admin/login.html";
        String jd =  "https://www.jd.com/";
        String it= "https://www.itheima.com/";
        //输入访问地址
        HttpGet httpGet= new HttpGet(uriBuilder.build());
        System.out.println("你的httpGet请求的地址"+httpGet);

        CloseableHttpResponse response= null;
        //使用httpClient发送请求,获得response
        try {
            response = httpClient.execute(httpGet);
            if (response.getStatusLine().getStatusCode()==200){
                String string = EntityUtils.toString(response.getEntity(),"utf8");
                System.out.println(string.length());
            }
        }catch (IOException e ){
            e.printStackTrace();
        }finally {
            try {
                response.close();
            } catch (IOException e) {
                e.printStackTrace();
            }
            try {
                httpClient.close();
            } catch (IOException e) {
                e.printStackTrace();
            }
        }


        //解析响应


    }
}

post

package cn.imust;

import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.client.methods.HttpPost;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.util.EntityUtils;
import java.io.IOException;

public class HttpPostTest {
    public static void main(String[] args) {
        //创建对象
        CloseableHttpClient httpClient = HttpClients.createDefault();

        String pr= "http://123.56.125.121:8080/tavel/admin/login.html";
        String jd =  "https://www.jd.com/";
        String it= "https://www.itcast.cn/";
        //输入访问地址
        HttpPost httpPost= new HttpPost(it);


        CloseableHttpResponse response= null;
        //使用httpClient发送请求,获得response
        try {
            response = httpClient.execute(httpPost);
            if (response.getStatusLine().getStatusCode()==200){
                String string = EntityUtils.toString(response.getEntity(),"utf8");
                System.out.println(string.length());
            }
        }catch (IOException e ){
            e.printStackTrace();
        }finally {
            try {
                response.close();
            } catch (IOException e) {
                e.printStackTrace();
            }
            try {
                httpClient.close();
            } catch (IOException e) {
                e.printStackTrace();
            }
        }


        //解析响应


    }
}

相比于get请求 post请求基本没什么变化

但是当带参数时会有变化,相比喻get请求,post请求的参数需要使用list集合存储所需要的key-value。

package cn.imust;

import org.apache.http.NameValuePair;
import org.apache.http.client.entity.UrlEncodedFormEntity;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpPost;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.message.BasicNameValuePair;
import org.apache.http.util.EntityUtils;

import java.io.IOException;
import java.util.ArrayList;
import java.util.List;

public class HttpPostParamTest {
    public static void main(String[] args) throws  Exception{
        //创建对象
        CloseableHttpClient httpClient = HttpClients.createDefault();

        String pr= "http://123.56.125.121:8080/tavel/admin/login.html";
        String jd =  "https://www.jd.com/";
        String it= "https://www.itcast.cn/";
        String it1= "http://yun.itheima.com/search";
        //输入访问地址
        HttpPost httpPost= new HttpPost(it1);
        List<NameValuePair> params  =  new ArrayList<NameValuePair>();
        params.add(new BasicNameValuePair("keys","java"));


        UrlEncodedFormEntity formEntity = new UrlEncodedFormEntity(params,"utf8");

        httpPost.setEntity(formEntity);
        CloseableHttpResponse response= null;
        //使用httpClient发送请求,获得response
        try {
            response = httpClient.execute(httpPost);
            if (response.getStatusLine().getStatusCode()==200){
                String string = EntityUtils.toString(response.getEntity(),"utf8");
                System.out.println(string.length());
            }
        }catch (IOException e ){
            e.printStackTrace();
        }finally {
            try {
                response.close();
            } catch (IOException e) {
                e.printStackTrace();
            }
            try {
                httpClient.close();
            } catch (IOException e) {
                e.printStackTrace();
            }
        }


        //解析响应


    }
}

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 打赏
    打赏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

进击的程序员1

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值