这段代码是简单的模拟了浏览器的请求行为。使用的是socket,大概步骤为:
1.建立socket链接。
2.写HTTP头信息。
3.分析服务器响应,这个步骤需要注意,头信息是正常的输入流,而正文是经过压缩的,如果在解析过程中没有解压会感觉鬼上身的哦。
上代码:
package xin;
import java.io.BufferedReader;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStream;
import java.io.UnsupportedEncodingException;
import java.net.Socket;
import java.util.zip.GZIPInputStream;
public class ScoketTest {
public static void main(String[] args) throws Exception {
String baidu = "www.baidu.com";
Socket sk = new Socket(baidu, 80);
//请求头信息
String sb = setHeaderInfo();
OutputStream os = sk.getOutputStream();
os.write(sb.getBytes());//写入请求信息
os.flush();//需要刷新输出流 用来向服务器发送命令
//获取请求后的的流
InputStream is = sk.getInputStream();
//分析处理服务器返回信息
String result = getInputStream(is);
System.out.println("解压后正文信息: " + result);
sk.close();
os.close();
is.close();
}
/**
* 处理返回流信息
* @param is
* @return
* @throws UnsupportedEncodingException
* @throws IOException
*/
private static String getInputStream(InputStream is) throws UnsupportedEncodingException {
ByteArrayOutputStream baos = new ByteArrayOutputStream();
StringBuffer sb = new StringBuffer();
//每次读取1M的数据 写入到baos
byte[] b = new byte[1024];
int end = 0;
try {
//读取返回流信息
//这里会阻塞 暂未找到好的解决方法
while ((end = is.read(b)) != -1) {
baos.write(b, 0, end);
}
baos.flush();
b = baos.toByteArray();
//分析头信息
int head_end = -1;
for (int i = 0; i < b.length; i++) {
if (b[i] == 13 && b[i + 1] == 10) {
System.out.println(new String(b, 0, i));
if (b[i + 2] == 13 && b[i + 3] == 10) {
head_end = i + 4;
break;
}
}
}
System.out.println("头信息:" + new String(b, 0, head_end));
//未解压正文 会产生乱码
System.out.println("(未解压正文:" + new String(b, head_end, b.length - head_end));
System.out.println("------------------------");
//分析正文 未压缩过的流 会抛出异常 java.io.IOException: Not in GZIP format
ByteArrayInputStream byteArrayInputStream = new ByteArrayInputStream(b, head_end,
b.length - head_end);
//解压流信息
GZIPInputStream gzipInputStream = new GZIPInputStream(byteArrayInputStream);
BufferedReader br = new BufferedReader(new InputStreamReader(gzipInputStream, "gb2312"));
while (br.read() != -1) {
sb.append(br.readLine());
}
br.close();
baos.close();
return sb.toString();
} catch (IOException e) {
e.printStackTrace();
}
return null;
}
/**
* 设置头信息
* @return
*/
private static String setHeaderInfo() {
StringBuffer sb = new StringBuffer();
sb.append("GET / HTTP/1.1\r\n");
sb.append("Connection: Keep-Alive\r\n");
// sb.append("User-Agent:wjy/5.0 (Windows NT 6.1; rv:2.0.1) Gecko/20100101 IE+Firefox/4.0.1");
sb.append("Accept-Encoding:gzip, deflate\r\n");
sb.append("Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8\r\n");
// sb.append("If-None-Match:5067cbeb8efe2a45ed5278888d6ceab0");
sb.append("\r\n");//根据HTTP协议 HTTP头信息和 正文之间有个换行
// sb.append("Accept-Charset:GB2312,utf-8;q=0.7,*;q=0.7");
// sb.append("Keep-Alive:115");
// sb.append("Connection:keep-alive");
// sb.append("Content-Type:application/json; charset=utf-8");
// sb.append("X-Requested-With:XMLHttpRequest");
// sb.append("Referer:http://login.m18.com/default.aspx");
return sb.toString();
}
}