java模拟浏览器操作,一般有五个步骤:
1、创建连接;
2、设置请求头信息
3、连接
4、设置请求参数
5、获取响应结果(包括响应头和内容)
下面以“中国国际航空航班信息查询”为例:
public static void main(String[] args)
{
final String encode = "utf-8";
final String website = "http://et.airchina.com.cn";
final int connectTimeOut = 15000;
final int readDataTimeOut = 50000;
HttpURLConnection httpConn = null;
String htmlContent = null;
String requestCookie = null;
String reqUrl = "/InternetBooking/AirLowFareSearchExt.do";
try
{
// 第一次请求【POST】
// 1、创建连接
URL url = new URL(website + reqUrl);
httpConn = (HttpURLConnection) url.openConnection();
httpConn.setDoInput(true);
httpConn.setDoOutput(true);
httpConn.setUseCaches(false);
httpConn.setConnectTimeout(connectTimeOut);
httpConn.setReadTimeout(readDataTimeOut);
// 2、设置请求头
httpConn.setRequestProperty("User-Agent",
"Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 5.1; Trident/4.0; .NET CLR 2.0.50727)");
System.out.println("request url : " + reqUrl);
// 3、连接
httpConn.setRequestMethod("POST");
httpConn.connect();
// 4、设置请求参数
OutputStream outStream = httpConn.getOutputStream();
String postData = "tripType=OW&searchType=FARE&flexibleSearch=false&directFlightsOnly=false&fareOptions=1.FAR.X&outboundOption.originLocationCode=PEK&outboundOption.destinationLocationCode=SZX&outboundOption.departureDay=28&outboundOption.departureMonth=09&outboundOption.departureYear=2011&outboundOption.departureTime=NA&guestTypes%5B0%5D.type=ADT&guestTypes%5B0%5D.amount=4 &guestTypes%5B1%5D.type=CNN&guestTypes%5B1%5D.amount=0&guestTypes%5B2%5D.type=INF&guestTypes%5B2%5D.amount=0&pos=AIRCHINA_CN&lang=zh_CN&ajaxAction=true";
outStream.write(postData.getBytes());
outStream.flush();
outStream.close();
// 5、获取响应结果
// 获取响应头信息
Map<String, List<String>> resHeaderMap = httpConn.getHeaderFields();
if (null != resHeaderMap
&& false == resHeaderMap.isEmpty())
{
for (Map.Entry<String, List<String>> entry : resHeaderMap.entrySet())
{
String key = entry.getKey();
String value = java.util.Arrays.toString(entry.getValue().toArray());
if (null != key
&& "Set-Cookie".equals(key.trim()))
{
requestCookie = value;
requestCookie = requestCookie.replace("[", "");
requestCookie = requestCookie.replace("]", "");
}
System.out.println(key + " : " + value);
}
}
System.out.println("\n\nresponse cookie : " + requestCookie);
// if (HttpURLConnection.HTTP_OK == httpConn.getResponseCode())
// {
// InputStream inStream = httpConn.getInputStream();
// htmlContent = getContentByStream(inStream, encode);
// System.out.println(htmlContent);
// }
// 第二次请求【POST】
// 1、创建连接
reqUrl = "/InternetBooking/AirFareFamiliesFlexibleForward.do";
url = new URL(website + reqUrl);
httpConn = (HttpURLConnection) url.openConnection();
httpConn.setDoInput(true);
httpConn.setDoOutput(true);
httpConn.setUseCaches(false);
httpConn.setConnectTimeout(connectTimeOut);
httpConn.setReadTimeout(readDataTimeOut);
// 2、设置请求头
httpConn.setRequestProperty("User-Agent",
"Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 5.1; Trident/4.0; .NET CLR 2.0.50727)");
if (null != requestCookie
&& false == "".equals(requestCookie.trim()))
{
httpConn.setRequestProperty("Cookie", requestCookie);
}
System.out.println("request url : " + reqUrl);
// 3、连接
httpConn.setRequestMethod("POST");
httpConn.connect();
// 4、设置请求参数
outStream = httpConn.getOutputStream();
postData = "lang=zh_CN";
outStream.write(postData.getBytes());
outStream.flush();
outStream.close();
// 5、获取响应结果
if (HttpURLConnection.HTTP_OK == httpConn.getResponseCode())
{
InputStream inStream = httpConn.getInputStream();
htmlContent = getContentByStream(inStream, encode);
// System.out.println(htmlContent);
// 6、内容输入到文件中
writeToFile(htmlContent);
}
System.out.println("================== crawl over. ");
} catch (IOException e)
{
// TODO Auto-generated catch block
e.printStackTrace();
} finally
{
if (null != httpConn)
{
try
{
httpConn.disconnect();
} catch (Exception e)
{
}
}
} // end-try-catch-finally
}