apache HttpClient 4.3.4自动登录并抓取中国联通网页用户基本信息和账单数据

1.新建一个maven项目httpclient

2.登录中国联通并抓取数据

3.使用Get模拟登录,抓取每月账单数据

中国联通有两种登录方式:

apache HttpClient 4.3.4自动登录并抓取中国联通网页用户基本信息和账单数据

apache HttpClient 4.3.4自动登录并抓取中国联通网页用户基本信息和账单数据

上面两图的区别一个是带验证码,一个是不带验证码, 下面将先解决不带验证码的登录.

这里有两个难点,一是验证码,二uvc码;

验证码,这里将其写到本地,然后人工输入,这个还比较好解决.

uvc码,很重要,这个是在cookie里的,httpclient操作cookie的方法网上找了很久都没有找到,后来看其源码才看到.

001package com.httpclient.asm.demo;
002  
003import org.apache.http.HttpEntity;
004import org.apache.http.HttpResponse;
005import org.apache.http.client.CookieStore;
006import org.apache.http.client.HttpClient;
007import org.apache.http.client.methods.CloseableHttpResponse;
008import org.apache.http.client.methods.HttpGet;
009import org.apache.http.client.methods.HttpPost;
010import org.apache.http.cookie.Cookie;
011import org.apache.http.impl.client.*;
012import org.apache.http.util.EntityUtils;
013  
014import javax.swing.*;
015import java.io.BufferedReader;
016import java.io.File;
017import java.io.FileOutputStream;
018import java.io.InputStream;
019import java.io.InputStreamReader;
020  
021public class LoginChinaUnicomWithCaptcha {
022  
023    /**
024     * 登录并抓取中国联通数据
025     * 带验证码登录
026     * @author Edson.di
027     * @date 2015年3月4日
028     * @version 1.0
029     * @throws IOException
030     */
031        public static void main(String args[]) throws Exception {
032  
033            String name = "联通手机号";
034            String pwd = "手机服务密码";
035  
036            //生成验证码的链接
037            String createCaptchaUrl = "http://uac.10010.com/portal/Service/CreateImage";
038            HttpClient httpClient = new DefaultHttpClient();
039  
040            //这里可自定义所需要的cookie
041            CookieStore cookieStore = new BasicCookieStore();
042  
043            CloseableHttpClient httpclient = HttpClients.custom()
044                    .setDefaultCookieStore(cookieStore)
045                    .build();
046  
047            //get captcha,获取验证码
048            HttpGet captchaHttpGet = new HttpGet(createCaptchaUrl);
049            HttpResponse capthcaResponse = httpClient.execute(captchaHttpGet);
050  
051            if (capthcaResponse.getStatusLine().getStatusCode() == 200) {
052                //将验证码写入本地
053                saveToLocal(capthcaResponse.getEntity(), "chinaunicom.capthca." + System.currentTimeMillis()+".png");
054            }
055  
056  
057            //手工输入验证码并验证
058            HttpResponse verifyResponse = null;
059            String capthca = null;
060            String uvc = null;
061  
062            do {
063                //输入验证码,读入键盘输入
064                //1)
065//                InputStream inputStream = System.in;
066//                BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(inputStream));
067//                System.out.println("请输入验证码:");
068//                capthca = bufferedReader.readLine();
069  
070                capthca=JOptionPane.showInputDialog("请输入图片验证码:");
071  
072                //2)
073                //Scanner scanner = new Scanner(System.in);
074                //capthca = scanner.next();
076                String verifyCaptchaUrl = "http://uac.10010.com/portal/Service/CtaIdyChk?verifyCode=" + capthca + "&verifyType=1";
077                HttpGet verifyCapthcaGet = new HttpGet(verifyCaptchaUrl);
078                verifyResponse = httpClient.execute(verifyCapthcaGet);
079                AbstractHttpClient abstractHttpClient = (AbstractHttpClient) httpClient;
080                for (Cookie cookie : abstractHttpClient.getCookieStore().getCookies()) {
081                    System.out.println(cookie.getName() + ":" + cookie.getValue());
082                    if (cookie.getName().equals("uacverifykey")) {
083                        uvc = cookie.getValue();
084                    }
085                }
086            } while (!EntityUtils.toString(verifyResponse.getEntity()).contains("true"));
087  
088            //登录
089            String loginurl = "https://uac.10010.com/portal/Service/MallLogin?userName=" + name + "&password=" + pwd + "&pwdType=01&productType=01&verifyCode=" + capthca + "&redirectType=03&uvc=" + uvc;
090            HttpGet loginGet = new HttpGet(loginurl);
091            CloseableHttpResponse loginResponse = httpclient.execute(loginGet);
092            System.out.print("result:" + EntityUtils.toString(loginResponse.getEntity()));
093  
094            //抓取基本信息数据
095            //jsonp1404663560635({resultCode:"7072",redirectURL:"http://www.10010.com",errDesc:"null",msg:'系统忙,请稍后再试。',needvode:"1"});
097            saveToLocal(httpclient.execute(basicHttpGet).getEntity(), "chinaunicom.basic.html");
098  
099        }
100        /**
101         * 写文件到本地
102         
103         * @param httpEntity
104         * @param filename
105         */
106        public static void saveToLocal(HttpEntity httpEntity, String filename) {
107          
108            try {
109          
110                File dir = new File("/JEE/sz-588/workspace/maven-httpclient-demo");
111                if (!dir.isDirectory()) {
112                    dir.mkdir();
113                }
114          
115                File file = new File(dir.getAbsolutePath() + "/" + filename);
116                FileOutputStream fileOutputStream = new FileOutputStream(file);
117                InputStream inputStream = httpEntity.getContent();
118          
119                if (!file.exists()) {
120                    file.createNewFile();
121                }
122                byte[] bytes = new byte[1024];
123                int length = 0;
124                while ((length = inputStream.read(bytes)) > 0) {
125                    fileOutputStream.write(bytes, 0, length);
126                }
127                inputStream.close();
128                fileOutputStream.close();
129            } catch (Exception e) {
130                e.printStackTrace();
131            }
132  
133   }
134}

生成文件

apache HttpClient 4.3.4自动登录并抓取中国联通网页用户基本信息和账单数据

json格式输出

apache HttpClient 4.3.4自动登录并抓取中国联通网页用户基本信息和账单数据

转自:http://www.zuidaima.com/share/2238465258310656.htm

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值