Java获取网页内容s实现自动化(IASI)

[size=medium][b]IasiClient [/b][/size]

package com.iasi.client;

import java.io.File;
import java.io.FileInputStream;
import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStream;
import java.io.PrintWriter;
import java.security.KeyManagementException;
import java.security.NoSuchAlgorithmException;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Calendar;
import java.util.Date;
import java.util.List;
import java.util.Properties;

import javax.net.ssl.SSLContext;
import javax.net.ssl.TrustManager;
import javax.net.ssl.X509TrustManager;

import org.apache.http.Header;
import org.apache.http.HttpEntity;
import org.apache.http.HttpHost;
import org.apache.http.HttpResponse;
import org.apache.http.NameValuePair;
import org.apache.http.client.ClientProtocolException;
import org.apache.http.client.CookieStore;
import org.apache.http.client.HttpClient;
import org.apache.http.client.entity.UrlEncodedFormEntity;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.client.methods.HttpPost;
import org.apache.http.conn.params.ConnRoutePNames;
import org.apache.http.conn.scheme.Scheme;
import org.apache.http.conn.ssl.SSLSocketFactory;
import org.apache.http.cookie.Cookie;
import org.apache.http.impl.client.DefaultHttpClient;
import org.apache.http.message.BasicHeader;
import org.apache.http.message.BasicNameValuePair;
import org.apache.http.util.EntityUtils;

public class IasiClient {
private Properties props = new Properties();
private String resDate;
private String dataSize = "10";
private String resStartDate;
private String resEndDate;

/*
* 根据dayBefore参数dates设定日期
*/
private void initProps(String[] dates) throws IOException {
try {
//引入iasi.properties配置文件
InputStream is = new FileInputStream(new File(System.getProperty("user.dir")+"/iasi.properties"));
props.load(is);
} catch (IOException e) {
e.printStackTrace();
}
int dayBefer = 0;
try {
dayBefer = Integer.parseInt(props.getProperty("dayBefore")); //dayBefer=2
} catch (NumberFormatException e) {
e.printStackTrace();
}
//如果dates为空,设置默认日期为(当天的日期-2),否则按照dates数组去设置
if(dates.length==0)
{
Calendar c = Calendar.getInstance();
c.add(Calendar.DATE, dayBefer * -1);
String resDate = new SimpleDateFormat("yyyy-MM-dd").format(c.getTime());
resStartDate=resDate;
resEndDate=resDate;
}
else if(dates.length==1)
{
resStartDate=dates[0];
resEndDate=dates[0];
}else
{
resStartDate=dates[0];
resEndDate=dates[1];
}
writeLogToFile("Grab date:" + resStartDate+" "+resEndDate);
}

/**
* 封装前几次请求获取的所有的参数和参数值到这里。
* @param httpclient
* @param sessionId
* @throws ClientProtocolException
* @throws IOException
*/
private void last(HttpClient httpclient, String sessionId) throws ClientProtocolException,IOException {
HttpPost httpost = new HttpPost("http://www.class.ncdc.noaa.gov/saa/prod/orderNow");
System.out.println("第五次请求链接地址(post):"+"http://www.class.ncdc.noaa.gov/saa/prod/orderNow");
/*
* 以下为httpost信息:
<?xml version="1.0" encoding="ISO-8859-1"?>
-<qsResults
xmlns:session-dir="http://saa.noaa.gov/session-dir"
xmlns:xsp-response="http://apache.org/xsp/response/2.0"
xmlns:util="http://apache.org/xsp/util/2.0"
xmlns:esql="http://apache.org/cocoon/SQL/v2"
xmlns:xspdoc="http://apache.org/cocoon/XSPDoc/v1"
xmlns:log="http://apache.org/xsp/log/2.0"
xmlns:xsp-session="http://apache.org/xsp/session/2.0"
xmlns:xsp="http://apache.org/xsp">
<message/> <total/> <size/> <hits/> <max/> <subord>null</subord>
</qsResults>
*/

httpost.setHeader("Cookie", sessionId);
//以下的formList添加的全部为request要传递的参数
List<NameValuePair> formList = new ArrayList<NameValuePair>();
formList.add(new BasicNameValuePair("search_opt", "SC"));
formList.add(new BasicNameValuePair("gid_pattern", ""));
formList.add(new BasicNameValuePair("orb_pattern", ""));
formList.add(new BasicNameValuePair(
"dsname_pattern",
"^IASI_(CCR|L02|XXX_1C)_M\\d\\d(_|_V\\d\\d\\d\\d\\d\\d_)20\\d\\d(0[1-9]|1[012])([012][0-9]|3[01])(.{0,65})$"));
// 配置文件读取
formList.add(new BasicNameValuePair("nlat", props.getProperty("nlat"))); // nlat=90
formList.add(new BasicNameValuePair("wlon", props.getProperty("wlon"))); // wlon=-180
formList.add(new BasicNameValuePair("elon", props.getProperty("elon"))); // elon=180
formList.add(new BasicNameValuePair("slat", props.getProperty("slat"))); // slat=-90
formList.add(new BasicNameValuePair("start_time", props.getProperty("start_time"))); //开始时间 00:00:00
formList.add(new BasicNameValuePair("end_time", props.getProperty("end_time"))); //结束时间 23:59:59
// 配置文件读取结束
formList.add(new BasicNameValuePair("minDiff", "0.0"));
// 中间参数读取
formList.add(new BasicNameValuePair("data_start", data_start)); //2007-02-28
formList.add(new BasicNameValuePair("data_end", data_end)); // 2012-11-06
formList.add(new BasicNameValuePair("max_days_val", max_days_val)); //366
// 中间参数读取结束
// 通过配置文件计算 根据参数dates设定日期如果dates为空,设置默认日期为(当天的日期-2),否则按照dates数组去设置
formList.add(new BasicNameValuePair("start_date", resStartDate)); //2012-11-04
formList.add(new BasicNameValuePair("end_date", resEndDate)); //2012-11-04
// 通过配置文件计算结束
formList.add(new BasicNameValuePair("between_through", "T"));
formList.add(new BasicNameValuePair("Datatype", "IASI1CAIP"));
formList.add(new BasicNameValuePair("limit_search", "Y"));
formList.add(new BasicNameValuePair("max_lat_range", "180"));
formList.add(new BasicNameValuePair("max_lon_range", "360"));
formList.add(new BasicNameValuePair("datatype_family", "IASI"));
UrlEncodedFormEntity form = new UrlEncodedFormEntity(formList);
httpost.setEntity(form);
HttpResponse response = httpclient.execute(httpost);
writeLogToFile("-------------xml----------------");
// 需要读取数据大小,最后一步需要使用
String xml = EntityUtils.toString(response.getEntity());
// <size>29649638114</size>

writeLogToFile(xml); //查看发出这个封装所有参数的请求,返回的是什么
dataSize = xml.substring(xml.indexOf("<size>") + 6, xml.indexOf("</size>"));
writeLogToFile("middle parameters Size:" + dataSize);
// System.out.println(EntityUtils.toString(response.getEntity()));
writeLogToFile("-------------end----------------");
EntityUtils.consume(response.getEntity());
httpost = new HttpPost("http://www.class.ncdc.noaa.gov/saa/products/shopping_cart");
System.out.println("第六次请求链接地址(post):"+"http://www.class.ncdc.noaa.gov/saa/products/shopping_cart");
httpost.setHeader("Cookie", sessionId); //可以设定一些请求头信息
response = httpclient.execute(httpost); //执行 返回结果
// System.out.println(EntityUtils.toString(response.getEntity()));
EntityUtils.consume(response.getEntity()); //差不多意思是做完一次post,略过response,然后下一次用同一个client对象就可以做下一次的访问了,同一个client对象保证了同一个session
}
//主入口
public void doConnection(String[] dates) throws NoSuchAlgorithmException, KeyManagementException,
ClientProtocolException, IOException {
/**
* HttpClient代表了一个http的客户端,HttpClient接口定义了大多数基本的http请求执行行为
* HttpEntity是发送或者接收消息的载体。entities 可以通过request和response获取到.
* HttpConnection代表了一个http连接。
*/
System.out.println("--------3---------");
this.initProps(dates);
DefaultHttpClient httpClient = new DefaultHttpClient(); //获取httpclient对象
try {
TrustManager easyTrustManager = new X509TrustManager() {

public void checkClientTrusted(
java.security.cert.X509Certificate[] x509Certificates, String s)
throws java.security.cert.CertificateException {
// To change body of implemented methods use File | Settings
// | File Templates.
}

public void checkServerTrusted(
java.security.cert.X509Certificate[] x509Certificates, String s)
throws java.security.cert.CertificateException {
// To change body of implemented methods use File | Settings
// | File Templates.
}

public java.security.cert.X509Certificate[] getAcceptedIssuers() {
return new java.security.cert.X509Certificate[0];
// To change body of implemented methods use File | Settings
// | File Templates.
}
};

SSLContext sslcontext = SSLContext.getInstance("TLS");
sslcontext.init(null, new TrustManager[] { easyTrustManager }, null);
SSLSocketFactory sf = new SSLSocketFactory(sslcontext);
Scheme sch = new Scheme("https", 443, sf); //
httpClient.getConnectionManager().getSchemeRegistry().register(sch);

/*
* 设置代理
* GET一般用于获取/查询 资源信息,而POST一般用于更新 资源信息(个人认为这是GET和POST的本质区别,也是协议设计者的本意,其它区别都是具体表现形式的差异 ),GET不经过FORM,POST经过FORM。
*/
HttpHost proxy = new HttpHost("10.24.5.105", 39002, "http");
httpClient.getParams().setParameter(ConnRoutePNames.DEFAULT_PROXY, proxy);
System.out.println("代理设置完毕。");
System.out.println("第一次请求链接地址(post):"+"https://www.class.ncdc.noaa.gov/saa/products/j_security_check");
HttpPost httppost = new HttpPost("https://www.class.ncdc.noaa.gov/saa/products/j_security_check"); //登录页面
// 封装参数
List<NameValuePair> form = new ArrayList<NameValuePair>();
//从配置文件读取的用户名和密码
form.add(new BasicNameValuePair("j_username", props.getProperty("userName"))); //xuna1
form.add(new BasicNameValuePair("j_password", props.getProperty("userPassword"))); //gsicsgsics
UrlEncodedFormEntity formEntity = new UrlEncodedFormEntity(form); //把用户名和密码封装(拼接)到了链接后面
httppost.setEntity(formEntity);
// 封装header
httppost.setHeader(new BasicHeader("Accept",
"text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8")); //告诉服务器本浏览器可以接受的数据格式
httppost.setHeader(new BasicHeader("Accept-Encoding", "gzip, deflate")); //告诉服务器,本浏览器接受gzip压缩过得格式
httppost.setHeader(new BasicHeader("Accept-Language",
"zh-cn,zh;q=0.8,en-us;q=0.5,en;q=0.3")); //告诉服务器,本浏览器接受的语言
httppost.setHeader(new BasicHeader("Connection", "keep-alive")); //告诉服务器,本此次socket链接是保持状态
httppost.setHeader(new BasicHeader("Cookie", this.getSessionId(httpClient))); //调用getSessionId,获取浏览器默认SESSIONID
httppost.setHeader(new BasicHeader("Host", "www.class.ncdc.noaa.gov"));//Host头域指定请求资源的Intenet主机和端口号,必须表示请求url的原始服务器或网关的位置。
httppost.setHeader(new BasicHeader("Referer","https://rda.ucar.edu/cgi-bin/login?resource=%2Fsaa%2Fproducts%2Fwelcome")); //Referer头域允许客户端指定请求uri的源资源地址,这可以允许服务器生成回退链表,可用来登陆、优化cache等。他也允许废除的或错误的连接由于维护的目的被追踪。如果请求的uri没有自己的uri地址,Referer不能被发送。如果指定的是部分uri地址,则此地址应该是一个相对地址。
httppost.setHeader(new BasicHeader("User-Agent",
"Mozilla/5.0 (Windows NT 6.1; rv:12.0) Gecko/20100101 Firefox/12.0 FirePHP/0.7.1")); //User-Agent头域的内容包含发出请求的用户信息。
httppost.setHeader(new BasicHeader("x-insight", "activate"));
writeLogToFile("executing request:" + httppost.getRequestLine()); // POST https://www.class.ncdc.noaa.gov/saa/products/j_security_check HTTP/1.1
System.out.println("执行返回状态码:"+httppost.getRequestLine().hashCode());
HttpResponse response = httpClient.execute(httppost); //发送连接请求
HttpEntity entity = response.getEntity(); //获取服务器返回的数据
writeLogToFile("-----------------landing start---登录开始--------------------");
System.out.println(response.getStatusLine().getStatusCode()); //302
writeLogToFile(response.getStatusLine().toString()); //Moved Temporarily 暂时移动
if (entity != null) {
writeLogToFile("Response content length: " + entity.getContentLength());
}
writeLogToFile("----------------landing end-------登录结束-----------------");
Header[] headers = response.getAllHeaders();
String location = "";
writeLogToFile("---------------------Befor landing jump page Head Info start 提取登录页面跳转之前的头部信息--------------------------");
for (Header header : headers) {
if ("Location".equals(header.getName())) {
location = header.getValue(); // location:https://www.class.ncdc.noaa.gov/saa/products/classlogin?resource=%2Fsaa%2Fproducts%2Fwelcome
}
writeLogToFile(header.getName() + ":" + header.getValue());
}
writeLogToFile("---------------------Befor landing jump page Head Info end-----------------------");
writeLogToFile("---------------------After landing jump page start-------登录页面跳转之后-------------------");
EntityUtils.consume(entity);
//模拟登录进来之后跳转到的页面(发出get请求),不需要任何参数给它
HttpGet get = new HttpGet(location);
System.out.println("第三次请求链接地址(get):"+location);
response = httpClient.execute(get);
// System.out.println(EntityUtils.toString(response.getEntity()));
writeLogToFile("---------------------After landing jump page end-----------------------");
EntityUtils.consume(response.getEntity());
writeLogToFile("---------------------middle parameters start--------------------------");
//登录进来之后,进行下一步操作,调用下面方法,查找两个日期2007-02-28 2012-11-06
this.getMiddleParameters(httpClient, this.getSessionId(httpClient));
writeLogToFile("---------------------middle parameters end-----------------------");
writeLogToFile("----------------get Info by ajax start--通过ajax获取信息----------------------");
//调用上面方法
this.last(httpClient, this.getSessionId(httpClient));
writeLogToFile("----------------get Info by ajax end---------------------");
writeLogToFile("----------------Submit email Info get result start------------------------");
EntityUtils.consume(response.getEntity());
this.doLast(httpClient, this.getSessionId(httpClient));
writeLogToFile("----------------Submit email Info get result end---------------------");
} finally {
httpClient.getConnectionManager().shutdown();
}
}

/**
* 发送订购信息到邮箱
* @param httpclient
* @param sessionId
* @throws ClientProtocolException
* @throws IOException
*/
private void doLast(HttpClient httpclient, String sessionId) throws ClientProtocolException,IOException {
System.out.println("--------4---------");
HttpPost httpost = new HttpPost("http://www.class.ncdc.noaa.gov/saa/products/shop");
System.out.println("第七次请求链接地址(post):"+"http://www.class.ncdc.noaa.gov/saa/products/shop");
httpost.setHeader("Cookie", sessionId);
List<NameValuePair> formList = new ArrayList<NameValuePair>();

formList.add(new BasicNameValuePair("cocoon-action", "PlaceOrder"));
formList.add(new BasicNameValuePair("deliv_manifest_opt", "N"));
formList.add(new BasicNameValuePair("delivery_media", ""));
formList.add(new BasicNameValuePair("delivery_method", ""));
formList.add(new BasicNameValuePair("ekey", "N"));
formList.add(new BasicNameValuePair("email", props.getProperty("email")));
formList.add(new BasicNameValuePair("encryption", ""));
formList.add(new BasicNameValuePair("media_list", "0"));
formList.add(new BasicNameValuePair("order_comment", ""));
formList.add(new BasicNameValuePair("order_now_IASI", "on"));
formList.add(new BasicNameValuePair("order_size", dataSize));
formList.add(new BasicNameValuePair("page", "cart"));
formList.add(new BasicNameValuePair("price_est", "0"));
formList.add(new BasicNameValuePair("product_number", ""));
formList.add(new BasicNameValuePair("quantity_est", "0"));
UrlEncodedFormEntity form = new UrlEncodedFormEntity(formList);
httpost.setEntity(form);
HttpResponse response = httpclient.execute(httpost);
String res = EntityUtils.toString(response.getEntity());
System.out.println("-------------ResultPage----------------");
System.out.println(res);
System.out.println("-------------ResultPage----------------");
if (res.indexOf("Your confirmation number is:") != 0) {
res = res.substring(res.indexOf("Your confirmation number is:") + 28);
res = res.substring(0, res.indexOf("<br>"));
}
writeLogToFile("-------------lastResult----------------");
writeLogToFile("ordernum="+res.trim());
File folder = new File(props.getProperty("resultPath"));
if(!folder.exists()){
folder.mkdirs();
}
new File(folder.getPath()+"/"+res.trim().substring(0,res.trim().indexOf("."))).createNewFile();
System.out.println(folder.getPath()+"/"+res.trim().substring(0,res.trim().indexOf("."))+"----------");
writeLogToFile("-------------lastResult----------------");
}

/**
* 当用户登录的时候获取sessionId
* @param httpClient
* @return
* @throws ClientProtocolException
* @throws IOException
*/
private String getSessionId(DefaultHttpClient httpClient) throws ClientProtocolException,
IOException {
System.out.println("--------5---------");
String res = null;
String url = "https://www.class.ncdc.noaa.gov/saa/products/classlogin?resource=%2Fsaa%2Fproducts%2Fwelcome";
System.out.println("第二次请求链接地址(post):"+"https://www.class.ncdc.noaa.gov/saa/products/classlogin?resource=%2Fsaa%2Fproducts%2Fwelcome");
HttpPost post = new HttpPost(url);
HttpResponse response = httpClient.execute(post);
writeLogToFile("-----------------Get browser default SessionId-----------------------");
CookieStore cookieStore = httpClient.getCookieStore();
List<Cookie> cookieList = cookieStore.getCookies();
for (Cookie cookie : cookieList) {
writeLogToFile(cookie.getName() + ":" + cookie.getValue());
if ("JSESSIONID".equals(cookie.getName())) {
res = "JSESSIONID=" + cookie.getValue();
}
}
//获取浏览器默认的SESSIONID
writeLogToFile("-----------------Get browser default SessionId end-------------------");
EntityUtils.consume(response.getEntity());
return res;
}


private String data_start = "";
private String data_end = "";
private String max_days_val = "";

/**
* 选取IASI,页面有用户开始时间,结束时间,经纬度选择,根据这些信息可以点击Seach可以查看到IASI的所有订单信息
* @param httpClient
* @param sessionId
* @throws ClientProtocolException
* @throws IOException
*/
private void getMiddleParameters(DefaultHttpClient httpClient, String sessionId)
throws ClientProtocolException, IOException {
System.out.println("--------6---------");
//选取IASI,页面有用户开始时间,结束时间,经纬度选择,根据这些信息可以点击Seach可以查看到IASI的所有订单信息
String url = "http://www.class.ncdc.noaa.gov/saa/products/search?sub_id=0&datatype_family=IASI";
System.out.println("第四次请求链接地址(get):"+"http://www.class.ncdc.noaa.gov/saa/products/search?sub_id=0&datatype_family=IASI");
HttpGet get = new HttpGet(url);
get.setHeader("Cookie", sessionId);
HttpResponse response = httpClient.execute(get);
String str = EntityUtils.toString(response.getEntity());

if (str.indexOf("data_start") != -1) {
String a = str.substring(str.indexOf("data_start"));
data_start = a.substring(a.indexOf("value=") + 7, a.indexOf(">") - 1);
} else {
data_start = "2007-02-28";
}
writeLogToFile(data_start); //2007-02-28
if (str.indexOf("data_end") != -1) {
String b = str.substring(str.indexOf("data_end"));
data_end = b.substring(b.indexOf("value=") + 7, b.indexOf(">") - 1);
} else {
data_end = new SimpleDateFormat("yyyy-MM-dd").format(new Date());
}
writeLogToFile(data_end); //2012-11-06
if (str.indexOf("max_days_val") != -1) {
String c = str.substring(str.indexOf("max_days_val"));
max_days_val = c.substring(c.indexOf("value=") + 7, c.indexOf(">") - 1);
} else {
max_days_val = "365";
}
writeLogToFile("max_days_val*******************"+max_days_val); //366
EntityUtils.consume(response.getEntity());
}

public void writeLogToFile(String message) throws IOException{
System.out.println(message);
String path = System.getProperty("user.dir")+"/log.log";
File file = new File(path);
if(!file.exists()){
file.createNewFile();
}
FileWriter fw = new FileWriter(file,true);
PrintWriter pw = new PrintWriter(fw);
pw.print(message+"\n");
fw.close();
fw.close();
}

public static void main(String[] args) {
IasiClient client = new IasiClient();

try {
client.doConnection(args);
} catch (KeyManagementException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (NoSuchAlgorithmException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (ClientProtocolException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}

[size=medium][b]iasi.properties[/b][/size]


#登录信息
userName=username
userPassword=password

#配置参数
#发送邮件地址
email=user@126.com
#几天之前的数据。如今天为2012-05-24 , 参数为1的时候,发送2012-05-23的数据
dayBefore=2
start_time=00:00:00
end_time=23:59:59
#经纬度
nlat=90
wlon=-180
elon=180
slat=-90
#最后文件存放路径(只可以到文件夹层级)
resultPath=/home/gsics/public/

  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值