导读:
通过输入网址获取返回的源码,没有去掉http协议头信息。
用的URI解析域名及Socket连接服务器,可自定义一次返回多少k的信息。直到返回完毕为止。可作为网络爬虫的简单引擎。
源码地址:http://www.foryears.com/HttpClient.rar
图片如下:
此主题相关图片如下:
核心代码:
import java.io.*;
import java.net.*;
import javax.net.*;
import javax.net.ssl.*;
import java.security.cert.*;
public class HttpClient {
public String getResponseStr(String url) {
String responseStr = "";
try {
if ((!url.startsWith("http://")) &&(!url.startsWith("https://"))) {
url = "http://" + url;
}
URI uri = new URI(url);
String protocol = uri.getScheme();
String host = uri.getHost();
int port = uri.getPort();
String path = uri.getRawPath();
if (path == null || path.length() == 0) {
path = "/";
}
String query = uri.getRawQuery();
if (query != null &&query.length() >0) {
path += "?" + query;
}
Socket socket;
if (protocol.equals("http")) {
if (port == -1) {
port = 80;
}
socket = new Socket(host, port);
}
else if (protocol.equals("https")) {
if (port == -1) {
port = 443;
}
SocketFactory factory = SSLSocketFactory.getDefault();
SSLSocket ssock = (SSLSocket) factory.createSocket(host, port);
SSLSession session = ssock.getSession();
X509Certificate cert = null;
try {
cert = (X509Certificate) session.getPeerCertificates()[0];
}
catch (SSLPeerUnverifiedException e) {
System.err.println(session.getPeerHost() +
"did not present a valid certificate");
System.exit(1);
}
System.out.println(session.getPeerHost() +
"has presented a certificate belonging t/t" + "[" +
cert.getSubjectDN() + "]/n" +
"The certificate was issued by: /t" + "[" +
cert.getIssuerDN() + "]");
socket = ssock;
}
else {
throw new IllegalArgumentException("URL must use http: or "+
"https: protocol");
}
InputStream from_server = socket.getInputStream();
PrintWriter to_server = new PrintWriter(socket.getOutputStream());
to_server.print("GET "+ path + "HTTP/1.0/r/n" + "Host: "+ host +
"/r/n" + "Connection: close/r/n/r/n");
to_server.flush();
byte[] buffer = new byte[8 * 1024];
int bytes_read;
int numbytes = 0;
while (true) {
bytes_read = from_server.read(buffer, numbytes,
buffer.length - numbytes);
if (bytes_read == -1) {
break;
}
numbytes += bytes_read;
if (numbytes >= 4 * 1024) {
break;
}
}
int i = 0;
while (i <= numbytes - 4) {
if (buffer[i++] == 13 &&buffer[i++] == 10 &&buffer[i++] == 13 &&
buffer[i++] == 10) {
break;
}
}
if (i >numbytes - 4) {
throw new IOException("End of headers not found in first "+ numbytes +
"bytes");
}
responseStr = new String(buffer);
socket.close();
}
catch (UnknownHostException e) {
responseStr = "域名解析失败,请检查网络设置:" + e.toString();
System.err.println(e);
}
catch (IOException e) {
responseStr = "文件传输失败:" + e.toString();
System.err.println(e);
}
catch (URISyntaxException e) {
responseStr = "URI构造语法出错:" + e.toString();
System.err.println(e);
System.err.println("Usage: java HttpClient [ ]");
}
return responseStr;
}
}
Trackback: http://tb.blog.csdn.net/TrackBack.aspx?PostId=623818
本文转自
http://blog.csdn.net/yeyinyue/archive/2006/03/14/623818.aspx
通过输入网址获取返回的源码,没有去掉http协议头信息。
用的URI解析域名及Socket连接服务器,可自定义一次返回多少k的信息。直到返回完毕为止。可作为网络爬虫的简单引擎。
源码地址:http://www.foryears.com/HttpClient.rar
图片如下:
此主题相关图片如下:
核心代码:
import java.io.*;
import java.net.*;
import javax.net.*;
import javax.net.ssl.*;
import java.security.cert.*;
public class HttpClient {
public String getResponseStr(String url) {
String responseStr = "";
try {
if ((!url.startsWith("http://")) &&(!url.startsWith("https://"))) {
url = "http://" + url;
}
URI uri = new URI(url);
String protocol = uri.getScheme();
String host = uri.getHost();
int port = uri.getPort();
String path = uri.getRawPath();
if (path == null || path.length() == 0) {
path = "/";
}
String query = uri.getRawQuery();
if (query != null &&query.length() >0) {
path += "?" + query;
}
Socket socket;
if (protocol.equals("http")) {
if (port == -1) {
port = 80;
}
socket = new Socket(host, port);
}
else if (protocol.equals("https")) {
if (port == -1) {
port = 443;
}
SocketFactory factory = SSLSocketFactory.getDefault();
SSLSocket ssock = (SSLSocket) factory.createSocket(host, port);
SSLSession session = ssock.getSession();
X509Certificate cert = null;
try {
cert = (X509Certificate) session.getPeerCertificates()[0];
}
catch (SSLPeerUnverifiedException e) {
System.err.println(session.getPeerHost() +
"did not present a valid certificate");
System.exit(1);
}
System.out.println(session.getPeerHost() +
"has presented a certificate belonging t/t" + "[" +
cert.getSubjectDN() + "]/n" +
"The certificate was issued by: /t" + "[" +
cert.getIssuerDN() + "]");
socket = ssock;
}
else {
throw new IllegalArgumentException("URL must use http: or "+
"https: protocol");
}
InputStream from_server = socket.getInputStream();
PrintWriter to_server = new PrintWriter(socket.getOutputStream());
to_server.print("GET "+ path + "HTTP/1.0/r/n" + "Host: "+ host +
"/r/n" + "Connection: close/r/n/r/n");
to_server.flush();
byte[] buffer = new byte[8 * 1024];
int bytes_read;
int numbytes = 0;
while (true) {
bytes_read = from_server.read(buffer, numbytes,
buffer.length - numbytes);
if (bytes_read == -1) {
break;
}
numbytes += bytes_read;
if (numbytes >= 4 * 1024) {
break;
}
}
int i = 0;
while (i <= numbytes - 4) {
if (buffer[i++] == 13 &&buffer[i++] == 10 &&buffer[i++] == 13 &&
buffer[i++] == 10) {
break;
}
}
if (i >numbytes - 4) {
throw new IOException("End of headers not found in first "+ numbytes +
"bytes");
}
responseStr = new String(buffer);
socket.close();
}
catch (UnknownHostException e) {
responseStr = "域名解析失败,请检查网络设置:" + e.toString();
System.err.println(e);
}
catch (IOException e) {
responseStr = "文件传输失败:" + e.toString();
System.err.println(e);
}
catch (URISyntaxException e) {
responseStr = "URI构造语法出错:" + e.toString();
System.err.println(e);
System.err.println("Usage: java HttpClient [ ]");
}
return responseStr;
}
}
Trackback: http://tb.blog.csdn.net/TrackBack.aspx?PostId=623818
本文转自
http://blog.csdn.net/yeyinyue/archive/2006/03/14/623818.aspx