1 /**
2 *3 *@paramargs4 * 测试一组网页,针对雅虎知识堂5 */
6 public static void main(finalString args[])7 {8 String url = "";9 final List list = new ArrayList();10 System.out.print("输入URL,一行一个,输入结束后输入 go 程序开始运行: \n");11 /*
12 *http://ks.cn.yahoo.com/question/1307121201133.html
13 *http://ks.cn.yahoo.com/question/1307121101907.html
14 *http://ks.cn.yahoo.com/question/1307121101907_2.html
15 *http://ks.cn.yahoo.com/question/1307121101907_3.html
16 *http://ks.cn.yahoo.com/question/1307121101907_4.html
17 *http://ks.cn.yahoo.com/question/1307121101907_5.html
18 *http://ks.cn.yahoo.com/question/1307121101907_6.html
19 *http://ks.cn.yahoo.com/question/1307121101907_7.html
20 *http://ks.cn.yahoo.com/question/1307121101907_8.html
21 */
22 final BufferedReader br = new BufferedReader(newInputStreamReader(System.in));23 try
24 {25 while (!(url = br.readLine()).equals("go"))26 {27 list.add(url);28 }29 }30 catch (finalException e)31 {32 e.getMessage();33 }34 final WebContent wc = newWebContent();35 HashMap hm = new HashMap();36 for (int i = 0; i < list.size(); i++)37 {38 hm =wc.getFromYahoo(list.get(i));39 System.out.println("标题: " + hm.get("title"));40 System.out.println("内容: \n" + hm.get("original"));41 }42 /*
43 * String htmlurl[] = {44 * "http://ks.cn.yahoo.com/question/1307121201133.html",45 * "http://ks.cn.yahoo.com/question/1307121101907.html",46 * "http://ks.cn.yahoo.com/question/1307121101907_2.html",47 * "http://ks.cn.yahoo.com/question/1307121101907_3.html",48 * "http://ks.cn.yahoo.com/question/1307121101907_4.html",49 * "http://ks.cn.yahoo.com/question/1307121101907_5.html",50 * "http://ks.cn.yahoo.com/question/1307121101907_6.html",51 * "http://ks.cn.yahoo.com/question/1307121101907_7.html",52 * "http://ks.cn.yahoo.com/question/1307121101907_8.html" }; WebContent53 * wc = new WebContent(); HashMap hm = new HashMap(); for (int i = 0; i < htmlurl.length; i++) { hm =55 * wc.getFromYahoo(htmlurl[i]); System.out.println("标题: " +56 * hm.get("title")); System.out.println("内容: \n" + hm.get("original")); }57 */
58 /*
59 * String html=""; String link=""; String sscript=""; String content="";60 * System.out.println(htmlurl+" 开始读取网页内容:");61 * html=wc.getOneHtml(htmlurl); System.out.println(htmlurl+"62 * 读取完毕开始分析……"); html=html.replaceAll("()","63 * ");//去除脚本 html=html.replaceAll("()","64 * ");//去掉CSS html=html.replaceAll(".*?"," ");//除去页面标题65 * html=html.replaceAll("]*href=(\"([^\"]*)\"|\'([^\']*)\'|([^\\s>]*))[^>]*>(.*?)","66 * ");//去掉链接 html=html.replaceAll("(\\s){2,}?"," ");//除去多余空格67 * html=wc.outTag(html);//多余标记 System.out.println(html);68 */
69
70 /*
71 * String s[]=html.split(" +"); for(int i=0;is[i].length())?content:s[i]; }73 * System.out.println(content);74 */
75
76 //System.out.println(htmlurl+"网页内容结束");
77 /*
78 * System.out.println(htmlurl+"网页脚本开始:"); List79 * script=wc.getScript(html); for(int i=0;i
93 }94 }95
96