1.新建一个maven项目httpclient
2.登录中国联通并抓取数据
3.使用Get模拟登录,抓取每月账单数据
中国联通有两种登录方式:
上面两图的区别一个是带验证码,一个是不带验证码, 下面将先解决不带验证码的登录.
这里有两个难点,一是验证码,二uvc码;
验证码,这里将其写到本地,然后人工输入,这个还比较好解决.
uvc码,很重要,这个是在cookie里的,httpclient操作cookie的方法网上找了很久都没有找到,后来看其源码才看到.
001 | package com.httpclient.asm.demo; |
002 | |
003 | import org.apache.http.HttpEntity; |
004 | import org.apache.http.HttpResponse; |
005 | import org.apache.http.client.CookieStore; |
006 | import org.apache.http.client.HttpClient; |
007 | import org.apache.http.client.methods.CloseableHttpResponse; |
008 | import org.apache.http.client.methods.HttpGet; |
009 | import org.apache.http.client.methods.HttpPost; |
010 | import org.apache.http.cookie.Cookie; |
011 | import org.apache.http.impl.client.*; |
012 | import org.apache.http.util.EntityUtils; |
013 | |
014 | import javax.swing.*; |
015 | import java.io.BufferedReader; |
016 | import java.io.File; |
017 | import java.io.FileOutputStream; |
018 | import java.io.InputStream; |
019 | import java.io.InputStreamReader; |
020 | |
021 | public class LoginChinaUnicomWithCaptcha { |
022 | |
023 | /** |
024 | * 登录并抓取中国联通数据 |
025 | * 带验证码登录 |
026 | * @author Edson.di |
027 | * @date 2015年3月4日 |
028 | * @version 1.0 |
029 | * @throws IOException |
030 | */ |
031 | public static void main(String args[]) throws Exception { |
032 | |
033 | String name = "联通手机号" ; |
034 | String pwd = "手机服务密码" ; |
035 | |
036 | //生成验证码的链接 |
037 | String createCaptchaUrl = "http://uac.10010.com/portal/Service/CreateImage" ; |
038 | HttpClient httpClient = new DefaultHttpClient(); |
039 | |
040 | //这里可自定义所需要的cookie |
041 | CookieStore cookieStore = new BasicCookieStore(); |
042 | |
043 | CloseableHttpClient httpclient = HttpClients.custom() |
044 | .setDefaultCookieStore(cookieStore) |
045 | .build(); |
046 | |
047 | //get captcha,获取验证码 |
048 | HttpGet captchaHttpGet = new HttpGet(createCaptchaUrl); |
049 | HttpResponse capthcaResponse = httpClient.execute(captchaHttpGet); |
050 | |
051 | if (capthcaResponse.getStatusLine().getStatusCode() == 200 ) { |
052 | //将验证码写入本地 |
053 | saveToLocal(capthcaResponse.getEntity(), "chinaunicom.capthca." + System.currentTimeMillis()+ ".png" ); |
054 | } |
055 | |
056 | |
057 | //手工输入验证码并验证 |
058 | HttpResponse verifyResponse = null ; |
059 | String capthca = null ; |
060 | String uvc = null ; |
061 | |
062 | do { |
063 | //输入验证码,读入键盘输入 |
064 | //1) |
065 | // InputStream inputStream = System.in; |
066 | // BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(inputStream)); |
067 | // System.out.println("请输入验证码:"); |
068 | // capthca = bufferedReader.readLine(); |
069 | |
070 | capthca=JOptionPane.showInputDialog( "请输入图片验证码:" ); |
071 | |
072 | //2) |
073 | //Scanner scanner = new Scanner(System.in); |
074 | //capthca = scanner.next(); |
076 | String verifyCaptchaUrl = "http://uac.10010.com/portal/Service/CtaIdyChk?verifyCode=" + capthca + "&verifyType=1" ; |
077 | HttpGet verifyCapthcaGet = new HttpGet(verifyCaptchaUrl); |
078 | verifyResponse = httpClient.execute(verifyCapthcaGet); |
079 | AbstractHttpClient abstractHttpClient = (AbstractHttpClient) httpClient; |
080 | for (Cookie cookie : abstractHttpClient.getCookieStore().getCookies()) { |
081 | System.out.println(cookie.getName() + ":" + cookie.getValue()); |
082 | if (cookie.getName().equals( "uacverifykey" )) { |
083 | uvc = cookie.getValue(); |
084 | } |
085 | } |
086 | } while (!EntityUtils.toString(verifyResponse.getEntity()).contains( "true" )); |
087 | |
088 | //登录 |
089 | String loginurl = "https://uac.10010.com/portal/Service/MallLogin?userName=" + name + "&password=" + pwd + "&pwdType=01&productType=01&verifyCode=" + capthca + "&redirectType=03&uvc=" + uvc; |
090 | HttpGet loginGet = new HttpGet(loginurl); |
091 | CloseableHttpResponse loginResponse = httpclient.execute(loginGet); |
092 | System.out.print( "result:" + EntityUtils.toString(loginResponse.getEntity())); |
093 | |
094 | //抓取基本信息数据 |
095 | //jsonp1404663560635({resultCode:"7072",redirectURL:"http://www.10010.com",errDesc:"null",msg:'系统忙,请稍后再试。',needvode:"1"}); |
096 | HttpPost basicHttpGet = new HttpPost( "http://iservice.10010.com/ehallService/static/acctBalance/execute/YH102010005/QUERY_AcctBalance.processData/Result" ); |
097 | saveToLocal(httpclient.execute(basicHttpGet).getEntity(), "chinaunicom.basic.html" ); |
098 | |
099 | } |
100 | /** |
101 | * 写文件到本地 |
102 | * |
103 | * @param httpEntity |
104 | * @param filename |
105 | */ |
106 | public static void saveToLocal(HttpEntity httpEntity, String filename) { |
107 | |
108 | try { |
109 | |
110 | File dir = new File( "/JEE/sz-588/workspace/maven-httpclient-demo" ); |
111 | if (!dir.isDirectory()) { |
112 | dir.mkdir(); |
113 | } |
114 | |
115 | File file = new File(dir.getAbsolutePath() + "/" + filename); |
116 | FileOutputStream fileOutputStream = new FileOutputStream(file); |
117 | InputStream inputStream = httpEntity.getContent(); |
118 | |
119 | if (!file.exists()) { |
120 | file.createNewFile(); |
121 | } |
122 | byte [] bytes = new byte [ 1024 ]; |
123 | int length = 0 ; |
124 | while ((length = inputStream.read(bytes)) > 0 ) { |
125 | fileOutputStream.write(bytes, 0 , length); |
126 | } |
127 | inputStream.close(); |
128 | fileOutputStream.close(); |
129 | } catch (Exception e) { |
130 | e.printStackTrace(); |
131 | } |
132 | |
133 | } |
134 | } |
生成文件
json格式输出