/* Function name: myGetHttpFile2 Description: 爬网页用 Input: URL 例如:http://www.126.com Output: 字符串,网页的HTML */ public String myGetHttpFile2(String url){ String authentication=null; ArrayList al=new ArrayList(); String PageURL = url; PageURL = url ; ArrayList urlCollection=new ArrayList(); Hashtable ht=new Hashtable(); //代理,如果要穿过代理将下面注释取消 //ht=System.getProperties(); //authentication = "Basic " + new sun.misc.BASE64Encoder().encode("代理的用户:代理的密码".getBytes()); //ht.put("proxySet", "true"); //ht.put("proxyHost", "172.16.20.2"); //ht.put("proxyPort", "80"); String urlSource=PageURL; StringBuffer htmlBuffer=new StringBuffer(); String returnStr=null; int codeStart=0; int codeEnd=0; int linkCount=0; String getURL=null; String realURL=null; String urlText=null; int from=0; int lenURL=0; int firstLink=0; int endLink=0; boolean isNewsLink=false; try { URL su = new URL (urlSource); URLConnection conn = su.openConnection(); //conn.setRequestProperty("Proxy-Authorization", authentication); InputStream imageSource=new URL(urlSource).openStream(); int ch; while((ch=imageSource.read())>-1) { htmlBuffer.append((char)ch); } imageSource.close(); returnStr= new String(htmlBuffer); returnStr=new String(returnStr.getBytes("ISO8859_1"),"GBK"); } catch(Exception e) { } if(returnStr!=null){ return returnStr ; }else{ return "empty" ; } }