关于URLConnection获取网页内容

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.HttpURLConnection;
import java.net.URL;
import java.net.URLConnection;

public class URLTest {

	public static void main(String[] args) {
		URL url;
		URLConnection urlconn;
		try {
			url = new URL("http://www.google.com.hk");
			urlconn = url.openConnection();
			HttpURLConnection httpConnection = (HttpURLConnection) urlconn;
			httpConnection.setConnectTimeout(1000000);
			httpConnection.setReadTimeout(1000000);
//			httpConnection.setRequestProperty("User-Agent", "new");
//			httpConnection.setRequestMethod("POST");
			InputStream in = httpConnection.getInputStream();
			BufferedReader br = new BufferedReader(new InputStreamReader(in, "Big5"));

			String line = "";
			while((line = br.readLine()) != null) {
				System.out.println(line);
			}
			br.close();
			in.close();
		} catch(IOException e) {
			e.printStackTrace();
		}
	}
}

 

import java.io.BufferedInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.net.MalformedURLException;
import java.net.URL;
import java.net.URLConnection;

public class PageDown {

	public PageDown(String sUrl) {
		try {
			URL url = new URL(sUrl);
			URLConnection urlConnection = url.openConnection();
			urlConnection.setConnectTimeout(6000);
			urlConnection.setReadTimeout(6000);
			String type = urlConnection.getContentType();
			System.out.println(type);
			urlConnection.connect();
//			int contentLength = urlConnection.getContentLength();
//			if(contentLength > 0) {
				InputStream raw = urlConnection.getInputStream();
				InputStream in = new BufferedInputStream(raw);
				byte[] data = new byte[10000];
				int bytesRead = 0;
				int offset = 0;
				while(offset < 10000) {
					bytesRead = in.read(data, offset, data.length - offset);
					if(bytesRead == -1) {
						break;
					}
					offset += bytesRead;
				}
				in.close();
				raw.close();
				System.out.println(new String(data, type.split("charset=")[1]));
//			}
			
		} catch(MalformedURLException e) {
			e.printStackTrace();
		} catch(IOException e) {
			e.printStackTrace();
		}
	}

	public static void main(String[] args) {
		new PageDown("http://hi.csdn.net/lishigui");
	}

}

 

有些站点会对请求User-Agent进行鉴别

部分参考:http://topic.csdn.net/u/20100620/18/3c181bed-46d4-4ad7-b0da-b12e479f3545.html

 

About the first program, if you directly use URL object to openStream without firstly openConnection, it's OK too.

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值