为了写一个java的采集程序,从网上学习到3种方法可以获取单个网页内容的方法,主要是运用到是java IO流方面的知识,对其不熟悉,因此写个小结。 import java.io.BufferedReader;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.net.HttpURLConnection;
import java.net.URL;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class Get_Html {
public static void main(String[] args) throws Exception {
long start= System.currentTimeMillis();
String str_url="http://www.hiphop8.com/city/guangdong/guangzhou.php";
Pattern p = Pattern.compile(">(13\\d{5}|15\\d{5}|18\\d{5}|147\\d{4})
//String html = get_Html_2(str_url);
//String html = get_Html_1(str_url);
String html = get_Html_3(str_url);
Matcher m = p.matcher(html);
int num = 0;
while(m.find())
{
System.out.println("打印出的号码段落:"+m.group(1)+" 编号"+(++num));
}
System.out.println(num);
long end = System.currentTimeMillis();
System.out.println("花费的时间