爬虫入门案例

需求: 模拟登陆慢慢买这个网址, 登陆成功以后, 获取当前用户的积分信息

登陆的url: http://home.manmanbuy.com/login.aspx
访问这个url的相关参数:
​ __VIEWSTATE __EVENTVALIDATION txtUser txtPass btnLogin

import org.apache.http.Header;
import org.apache.http.HttpEntity;
import org.apache.http.client.entity.UrlEncodedFormEntity;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.client.methods.HttpPost;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.message.BasicNameValuePair;
import org.apache.http.util.EntityUtils;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.select.Elements;

import java.io.UnsupportedEncodingException;
import java.util.ArrayList;
import java.util.List;

// 模拟登陆
public class ManManSpider {

public static void main(String[] args) throws Exception {
    //1. 确定首页URL
    String  loginUrl = "http://home.manmanbuy.com/login.aspx";

    //2. 发送请求, 获取数据
    //2.1 创建httpClient对象
    CloseableHttpClient httpClient = HttpClients.createDefault();

    //2.2 设置请求方式
    HttpPost httpPost = new HttpPost(loginUrl);
    //2.3 封装请求参数
    List<BasicNameValuePair> list = new ArrayList<BasicNameValuePair>();
    list.add(new BasicNameValuePair("__VIEWSTATE","/wEPDwULLTIwNjQ3Mzk2NDFkGAEFHl9fQ29udHJvbHNSZXF1aXJlUG9zdEJhY2tLZXlfXxYBBQlhdXRvTG9naW4voj01ABewCkGpFHsMsZvOn9mEZg=="));
    list.add(new BasicNameValuePair("__EVENTVALIDATION","/wEWBQLW+t7HAwLB2tiHDgLKw6LdBQKWuuO2AgKC3IeGDJ4BlQgowBQGYQvtxzS54yrOdnbC"));
    list.add(new BasicNameValuePair("txtUser","itcast"));
    list.add(new BasicNameValuePair("txtPass","www.itcast.cn"));
    list.add(new BasicNameValuePair("btnLogin","登陆"));

    HttpEntity entity = new UrlEncodedFormEntity(list);
    httpPost.setEntity(entity);

    //2.4 封装请求头: referer
    httpPost.setHeader("Referer","http://home.manmanbuy.com/login.aspx");

    //2.5 发送请求, 获取响应对象
    CloseableHttpResponse response = httpClient.execute(httpPost);
    //2.6 获取数据
    //2.6.1 :状态码
    int statusCode = response.getStatusLine().getStatusCode();
    if(statusCode==302){
       //登陆成功, 获取重定向URL
        Header[] locations = response.getHeaders("Location");
        String reUrl = locations[0].getValue();
        reUrl = "http://home.manmanbuy.com"+reUrl;

        Header[] cookies = response.getHeaders("Set-Cookie");
        //System.out.println(headers.length);

        httpClient = HttpClients.createDefault();
        HttpGet httpGet = new HttpGet(reUrl);
        //封装登陆成功的cookie标识信息
        httpGet.setHeader("Cookie",cookies[0].getValue()+" "+cookies[1].getValue());

        //重定向后的response的对象
        response = httpClient.execute(httpGet);
        //重定向后的页面的数据
        String html = EntityUtils.toString(response.getEntity(), "UTF-8");

        //解析HTML的数据
        Document document = Jsoup.parse(html);
        Elements jiFenEl = document.select("#aspnetForm > div.udivright > div:nth-child(2) > table > tbody > tr > td:nth-child(1) > table:nth-child(2) > tbody > tr > td:nth-child(2) > div:nth-child(1) > font");
        System.out.println(jiFenEl.text());

    }
}

}

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值