这是一个将指定URL地址的网页 抓取为本地网页的代码:
import java.io.FileOutputStream;
import java.io.InputStream;
import java.io.OutputStream;
import java.net.HttpURLConnection;
import java.net.URL;
//CL:得到的是指定URL的HTML文件
public class GetUrlToHtml {
public static void main(String[] args) {
InputStream in = null;
OutputStream out = null;
try {
// 检查命令行参数
if ((args.length != 1)&& (args.length != 2))
throw new IllegalArgumentException("Wrong number of args");
URL url = new URL(args[0]); //创建 URL
in = url.openStream(); // 打开到这个URL的流
if (args.length == 2) // 创建一个适当的输出流
out = new FileOutputStream(args[1]);
else out = System.out; // 复制字节到输出流
byte[] buffer = new byte[4096];
int bytes_read;
while((bytes_read = in.read(buffer)) != -1)
out.write(buffer, 0, bytes_read);
}
catch (Exception e) {
System.err.println(e);
System.err.println("Usage: java GetURL <URL> [<filename>]");
}
finally { //无论如何都要关闭流
try { in.close(); out.close(); } catch (Exception e) {}
}
}
}