爬虫学习(一)
抓取网页源码
代码块:
package com.spider;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.net.MalformedURLException;
import java.net.URL;
import java.net.URLConnection;
public class BaiduSourceCodeTest {
public static void main (String[] args) {
String url = "http://www.baidu.com" ;
String result = "" ;
BufferedReader in = null ;
try {
URL realUrl = new URL(url);
URLConnection con = realUrl.openConnection();
con.connect();
in = new BufferedReader(new InputStreamReader(con.getInputStream()));
String line;
while ((line=in.readLine()) != null ) {
result = result + line;
}
} catch (MalformedURLException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
} finally {
if (in != null ) {
try {
in.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
System.out.println(result);
}
}