下面提供一个文件,该文件实现的功能有,java下访问网址,下载下来该页面到指定目录。可以设置代理服务器。
package ie;
/**
* @author webkkk(blog.csdn.net/webkkk)
*/
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.DataOutputStream;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.HttpURLConnection;
import java.net.MalformedURLException;
import java.net.URL;
public class JavaIe {
public static void main(String[] args) {
DownLoadPages("http://www.rdnovel.com/files/article/novelread/0/37/349424.html","C:/temp/log/");
}
public static void DownLoadPages(String urlStr, String outPath) {
int chByte = 0;
URL url = null;
HttpURLConnection httpConn = null;
InputStream in = null;
FileOutputStream out = null;
try {
// use proxy begin
System.getProperties().put("proxySet","true");
System.getProperties().setProperty( "http.proxyHost","172.16.64.10");
System.getProperties().setProperty( "http.proxyPort","12080");
// use proxy end
url = new URL(urlStr);
httpConn = (HttpURLConnection) url.openConnection();
HttpURLConnection.setFollowRedirects(true);
httpConn.setRequestMethod("GET");
httpConn.setRequestProperty("User-Agent",
"Mozilla/4.0 (compatible; MSIE 6.0; Windows 2000)");
in = httpConn.getInputStream();
File file = new File(outPath);
out = new FileOutputStream(file);
chByte = in.read();
while (chByte != -1) {
out.write(chByte);
chByte = in.read();
}
} catch (MalformedURLException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
} finally {
try {
out.close();
in.close();
httpConn.disconnect();
} catch (Exception ex) {
ex.printStackTrace();
}
}
}
}
此外,还有第二种方法可以访问Google的网站,就是用apache的一个工具HttpClient 模仿一个浏览器来访问Google
Document document = null;
HttpClient httpClient = new HttpClient();
GetMethod getMethod = new GetMethod(url);
getMethod.setFollowRedirects(true);
int statusCode = httpClient.executeMethod(getMethod);
if (statusCode == HttpStatus.SC_OK)
{
InputStream in = getMethod.getResponseBodyAsStream();
InputSource is = new InputSource(in);
DOMParser domParser = new DOMParser(); //nekoHtml 将取得的网页转换成dom
domParser.parse(is);
document = domParser.getDocument();
System.out.println(getMethod.getURI());
}
return document;
推荐使用第一种方式,使用HttpConnection 比较轻量级,速度也比第二种HttpClient 的快。
关于java模拟ie form登陆web的问题
HttpURLConnection urlConn=(HttpURLConnection)(new URL(url).openConnection());
urlConn.addRequestProperty("Cookie",cookie);
urlConn.setRequestMethod("POST");
urlConn.setRequestProperty("User-Agent","Mozilla/4.0 (compatible; MSIE 6.0; Windows 2000)");
urlConn.setFollowRedirects(true);
urlConn.setDoOutput(true); // 需要向服务器写数据
urlConn.setDoInput(true); //
urlConn.setUseCaches(false); // 获得服务器最新的信息
urlConn.setAllowUserInteraction(false);
urlConn.setRequestProperty("Content-Type","application/x-www-form-urlencoded");
urlConn.setRequestProperty("Content-Language","en-US" );
urlConn.setRequestProperty("Content-Length", ""+data.length());
DataOutputStream outStream = new DataOutputStream(urlConn.getOutputStream());
outStream.writeBytes(data);
outStream.flush();
outStream.close();
cookie=urlConn.getHeaderField("Set-Cookie");
BufferedReader br=new BufferedReader(new InputStreamReader(urlConn.getInputStream(),"gb2312"));