一个简单的java蜘蛛, 通过流的到网站的信息,没有对Html进行分析,等有时间了再完善。
package cn.border.spider;
import java.io.BufferedReader;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.HttpURLConnection;
import java.net.URL;
public class HttpUserAgentTest {
/**
* @param args
* @throws Exception
*/
public static void main(String[] args) throws Exception {
URL url = new URL("http://www.borderj.cn");
HttpURLConnection httpConnection = (HttpURLConnection) url
.openConnection();
//设置User-Agent
httpConnection.setRequestProperty("User-Agent",
"BorderSpider ( Http://www.borderj.cn)");
//获得输入流
InputStream input = httpConnection.getInputStream();
InputStreamReader inReader = new InputStreamReader(input, "utf-8");//获得链接该类的流
BufferedReader reader = new BufferedReader(inReader);
int retVal = 0;
char[] cString = new char[1000];
int len = 1000;
String getString = "";
while ((retVal = reader.read(cString, 0, len)) != -1) {
getString += String.valueOf(cString, 0, retVal);
}
System.out.println(getString);
}
}
--
Blog: www.borderj.cn
MSN: borderj@live.com
Border