java获取页面源码的三种方式(转载,只为记录)

一、GetURL.java
import java.io.*;
import java.net.*;
public class GetURL {
public static void main(String[] args) {
InputStream in = null;
OutputStream out = null;
try {
// 检查命令行参数
if ((args.length != 1)&& (args.length != 2))
throw new IllegalArgumentException("Wrong number of args");


URL url = new URL(args[0]); //创建 URL
in = url.openStream(); // 打开到这个URL的流
if (args.length == 2) // 创建一个适当的输出流
out = new FileOutputStream(args[1]);
else out = System.out;

// 复制字节到输出流
byte[] buffer = new byte[4096];
int bytes_read;
while((bytes_read = in.read(buffer)) != -1)
out.write(buffer, 0, bytes_read);
}

catch (Exception e) {
System.err.println(e);
System.err.println("Usage: java GetURL <URL> [<filename>]");
}
finally { //无论如何都要关闭流
try { in.close(); out.close(); } catch (Exception e) {}
}
}
}

运行方法:
C:\java>java GetURL http://127.0.0.1:8080/kj/index.html index.html

二、geturl.jsp

<%@ page import="java.io.*" contentType="text/html;charset=gb2312" %>
<%@ page language="java" import="java.net.*"%>


<%
String htmpath=null;
BufferedReader in = null;
InputStreamReader isr = null;
InputStream is = null;
PrintWriter pw=null;
HttpURLConnection huc = null;
try{
htmpath=getServletContext().getRealPath("/")+"html\\morejava.html";
pw=new PrintWriter(htmpath);
URL url = new URL("http://127.0.0.1:8080/kj/morejava.jsp"); //创建 URL
huc = (HttpURLConnection)url.openConnection();
is = huc.getInputStream();
isr = new InputStreamReader(is);
in = new BufferedReader(isr);
String line = null;
while(((line = in.readLine()) != null)) {
if(line.length()==0)
continue;
pw.println(line);
}

}

catch (Exception e) {
System.err.println(e);
}
finally { //无论如何都要关闭流
try { is.close(); isr.close();in.close();huc.disconnect();pw.close();
} catch (Exception e) {}
}


%>
OK--,创建文件成功

三、HttpClient.java

import java.io.*;
import java.net.*;

public class HttpClient {
public static void main(String[] args) {
try {
// 检查命令行参数
if ((args.length != 1) && (args.length != 2))
throw new IllegalArgumentException("Wrong number of args");

OutputStream to_file;
if (args.length == 2)
to_file = new FileOutputStream(args[1]);//输出到文件
else
to_file = System.out;//输出到控制台


URL url = new URL(args[0]);
String protocol = url.getProtocol();
if (!protocol.equals("http"))
throw new IllegalArgumentException("Must use 'http:' protocol");
String host = url.getHost();
int port = url.getPort();
if (port == -1) port = 80;
String filename = url.getFile();

Socket socket = new Socket(host, port);//打开一个socket连接

InputStream from_server = socket.getInputStream();//获取输入流
PrintWriter to_server = new PrintWriter(socket.getOutputStream());//获取输出流


to_server.print("GET " + filename + "\n\n");//请求服务器上的文件
to_server.flush(); // Send it right now!

byte[] buffer = new byte[4096];
int bytes_read;

//读服务器上的响应,并写入文件。
while((bytes_read = from_server.read(buffer)) != -1)
to_file.write(buffer, 0, bytes_read);

socket.close();
to_file.close();
}
catch (Exception e) {
System.err.println(e);
System.err.println("Usage: java HttpClient <URL> [<filename>]");
}
}
}

运行方法:C:\java>java HttpClient http://127.0.0.1:8080/kj/index.html index.html


注意中文可能会显示乱码,在得到源码后,应该做相应的转码工作,例如:


public static String GetURLstr(String strUrl)
{
InputStream in = null;
OutputStream out = null;
String strdata = "";
try
{
URL url = new URL(strUrl); // 创建 URL
in = url.openStream(); // 打开到这个URL的流
out = System.out;

// 复制字节到输出流
byte[] buffer = new byte[4096];
int bytes_read;
while ((bytes_read = in.read(buffer)) != -1)
{
String reads = new String(buffer, 0, bytes_read, "UTF-8");
//System.out.print(reads);
strdata = strdata + reads;
// out.write(buffer, 0, bytes_read);
}

in.close();
out.close();
return strdata;
}

catch (Exception e)
{
System.err.println(e);
System.err.println("Usage: java GetURL <URL> [<filename>]");
return strdata;
}
}
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值