java抓取网页源代码《转载》

public static String getHtmlContent(URL url, String encode) { 
          StringBuffer contentBuffer = new StringBuffer(); 
   
          int responseCode = -1; 
          HttpURLConnection con = null; 
          try { 
              con = (HttpURLConnection) url.openConnection(); 
              con.setRequestProperty("User-Agent", "Mozilla/4.0 (compatible; MSIE 5.0; Windows NT; DigExt)");// IE代理进行下载 
              con.setConnectTimeout(60000); 
              con.setReadTimeout(60000); 
              // 获得网页返回信息码 
              responseCode = con.getResponseCode(); 
              if (responseCode == -1) { 
                  System.out.println(url.toString() + " : connection is failure..."); 
                 con.disconnect(); 
                  return null; 
              } 
              if (responseCode >= 400) // 请求失败 
              { 
                  System.out.println("请求失败:get response code: " + responseCode); 
                  con.disconnect(); 
                  return null; 
              } 
  
              InputStream inStr = con.getInputStream(); 
              InputStreamReader istreamReader = new InputStreamReader(inStr, encode); 
              BufferedReader buffStr = new BufferedReader(istreamReader); 
   
              String str = null; 
              while ((str = buffStr.readLine()) != null) 
                contentBuffer.append(str); 
              inStr.close(); 
          } catch (IOException e) { 
              e.printStackTrace(); 
              contentBuffer = null; 
              System.out.println("error: " + url.toString()); 
          } finally { 
              con.disconnect(); 
         } 
          return contentBuffer.toString(); 
      } 
  
      public static String getHtmlContent(String url, String encode) { 
          if (!url.toLowerCase().startsWith("http://")) { 
             url = "http://" + url; 
          } 
          try { 
              URL rUrl = new URL(url); 
              return getHtmlContent(rUrl, encode); 
          } catch (Exception e) { 
              e.printStackTrace(); 
              return null; 
          } 
      } 
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值