import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.HttpURLConnection;
import java.net.MalformedURLException;
import java.net.ProtocolException;
import java.net.URL;
public class pageSpider implements Runnable {
HttpURLConnection httpUrlConnection;
InputStream inputStream;
BufferedReader bufferedReader;
String url;
public pageSpider() {
try {
url = "http://www.baidu.com";
} catch (Exception e) {
e.printStackTrace();
}
try {
httpUrlConnection = (HttpURLConnection) new URL(url)
.openConnection(); // 创建连接
} catch (MalformedURLException e) {
e.printStackTrace();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
Thread thread = new Thread(this);
thread.start();
try {
thread.join();
} catch (InterruptedException e) {
e.printStackTrace();
}
}
public void run() {
// TODO Auto-generated method stub
try {
httpUrlConnection.setRequestMethod("GET");
} catch (ProtocolException e) {
e.printStackTrace();
}
try {
httpUrlConnection.setUseCaches(true); // 使用缓存
httpUrlConnection.connect(); // 建立连接
} catch (IOException e) {
e.printStackTrace();
}
try {
inputStream = httpUrlConnection.getInputStream(); // 读取输入流
bufferedReader = new BufferedReader(new InputStreamReader(
inputStream, "gb2312"));
String string;
while ((string = bufferedReader.readLine()) != null) {
System.out.println(string); // 打印输出
}
} catch (IOException e) {
e.printStackTrace();
} finally {
try {
bufferedReader.close();
inputStream.close();
httpUrlConnection.disconnect();
} catch (IOException e) {
e.printStackTrace();
}
}
}
public static void main(String[] args) {
new pageSpider();
}
}
一个简爬取网页源文件的Demo
最新推荐文章于 2020-11-24 00:50:48 发布