Java程序获取网页源代码

import java.io.BufferedOutputStream;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;
import java.io.UnsupportedEncodingException;
import java.net.URL;
import java.net.URLConnection;
import java.util.Date;
import java.util.zip.GZIPInputStream;


public class TestHtmlCode {

	/**
	 * 显示页面信息总控制类
	 * @param args
	 */
	void display(String urlString){
		try {
			URL url = new URL(urlString);
			// 创建URLConnection对象,用URL的openConnection方法将连接通过返回给URLConnection的对象
			// 实际上URL的openConnection的返回值就是一个URLConnection
			URLConnection c = url.openConnection(); // *
			// 用URLConnection的connect()方法建立连接
			c.connect(); // *
			// 显示该连接的相关信息,这些都是URLConnection的方法
			System.out.println("编码:" + c.getContentEncoding());
			System.out.println("内容类型: " + c.getContentType());
			System.out.println("内容长度: " + c.getContentLength());
			System.out.println("创建日期: " + new Date(c.getDate()));
			System.out.println("最后修改日期: " + new Date(c.getLastModified()));
			System.out.println("终止日期: " + new Date(c.getExpiration()));
	
			if ("gzip".equals(c.getContentEncoding())) {
				this.doGzipHtml(c);
				return ;
			} 
			this.doSimpleHtml(c);

		} catch (IOException e) {
			System.out.println(e);
		}
	}
	
	/**
	 * 如果contentEcoding为gzip, 则用GZIPInputStream读源文件
	 * @param c
	 */
	private void doGzipHtml(URLConnection c){
		try {
/*			GZIPInputStream is1 = new GZIPInputStream(c.getInputStream());
			int n1;
			byte all[] = new byte[100000]; //1000000这个参数可能不合适
			n1 = is1.available();
			byte ko1[] = new byte[n1];
			is1.read(ko1);
			int num1;
			int j = 0;
			while ((num1 = is1.read(ko1, 0, ko1.length)) != -1) {
				for (int i = 0; i < ko1.length; i++) {
					all[j] = ko1[i];
					j++;
				}

			}
			System.out.println(all.length);
			System.out.println(new String(all, "UTF-8"));

			BufferedOutputStream out = new BufferedOutputStream(
					new FileOutputStream("d:\\category.txt"));
			out.write(all, 0, all.length);*/
			
			GZIPInputStream is1 = new GZIPInputStream(c.getInputStream());
			BufferedReader br;
			BufferedWriter bw;
			br = new BufferedReader(new InputStreamReader(is1));
			bw = new BufferedWriter(new OutputStreamWriter(new FileOutputStream("d:\\category.txt")));  //会同时写入到这个文件里
			String str = null;
			while ((str = br.readLine()) != null) {
				String newStr = new String(str.getBytes(), "utf8");
				System.out.println(newStr);
				bw.write(newStr);
				bw.flush();
			}
		} catch (UnsupportedEncodingException e) {
			e.printStackTrace();
		} catch (FileNotFoundException e) {
			e.printStackTrace();
		} catch (IOException e) {
			e.printStackTrace();
		}
	}
	
	/**
	 * 如果contentEcoding为null, 则用InputStream读源文件
	 * @param c
	 */
	private void doSimpleHtml(URLConnection c){
		BufferedReader br;
		BufferedWriter bw;
		 try {
			br = new BufferedReader(new InputStreamReader(c.getInputStream()));
			 bw = new BufferedWriter(new OutputStreamWriter(new FileOutputStream("d:\\category.txt")));  //会同时写入到这个文件里

			String str = null;
			while ((str = br.readLine()) != null) {
				String newStr = new String(str.getBytes(), "gb2312");
				System.out.println(newStr);
				bw.write(newStr);
				bw.flush();
			}
		} catch (FileNotFoundException e) {
			e.printStackTrace();
		} catch (UnsupportedEncodingException e) {
			e.printStackTrace();
		} catch (IOException e) {
			e.printStackTrace();
		}
	}
	
	public static void main(String[] args) {
		String urlString = 
		    "http://www.baidu.com"; //网址
		new TestHtmlCode().display(urlString);
	}

}

  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值