package cracler;
import java.io.BufferedReader;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.commons.httpclient.HttpClient;
import org.apache.commons.httpclient.methods.GetMethod;
public class CrawOnePageImg {
/**
* @param args
*/
public static void main(String[] args) {
String url = "http://www.baidu.com/";
GetMethod method = new GetMethod(url);
HttpClient client = new HttpClient();
try {
client.executeMethod(method);
InputStream in = method.getResponseBodyAsStream();
BufferedReader reader = new BufferedReader(new InputStreamReader(in, method.getResponseCharSet()));
StringBuilder sb = new StringBuilder();
String line;
while( (line = reader.readLine()) != null){
sb.append(line);
}
String body = sb.toString();
Pattern p = Pattern.compile("<img.*?src=\"(.*?)\""); //() 捕获组
Matcher m = p.matcher(body);
while (m.find()){
System.out.println(m.group(1));
}
} catch (Exception e) {
e.printStackTrace();
}
method.releaseConnection();
}
}