酷酷酷

/* 
 ***************************************************************************** 
 * This software is under the Apache License Version 2.0 
 * Author: Tao -  mail:cn.java.river@gmail.com 
 * Spreading Your Heart 
 **************************************************************************** 
 */  
  
package atao.util.html;  
  
import java.io.BufferedReader;  
import java.io.BufferedWriter;  
import java.io.File;  
import java.io.FileOutputStream;  
import java.io.FileWriter;  
import java.io.InputStream;  
import java.io.InputStreamReader;  
import java.net.URL;  
  
import org.apache.commons.lang.StringUtils;  
  
/** 
 *  
 * A Simple HTML downloader which can also download Page resources. 
 * <br/> 
 * <b>Note: This Tool won't download related or sub HTML</b> 
 *  
 * @author <a href="mailto:cn.java.river@gmail.com">Tao</a> 
 * @since 1.0 
 */  
public class HtmlDownloader  
{  
  
    //URL will be downloaded.  
    private static String url = "http://pervasive2.morselli.unimo.it/~nicola/courses/IngegneriaDelSoftware/java/J6d_xml.html";  
      
    //workspace folder.  
    private static String workspace = "download";  
      
    //sub css and js resources sign   
    private static String urlSign = "<link href=";  
      
    //sub image resources sign  
    private static String urlSign2 = "src=";  
      
    //URL parent.  
    private static String rootUrl = null;  
  
    public static void main (String[] args) throws Exception  
    {  
        long start = System.nanoTime ();  
        setRootUrl ();  
        URL u = new URL (url);  
        InputStream is = u.openStream ();  
        BufferedReader reader = new BufferedReader (new InputStreamReader (is));  
        File f = createDownloadFile ("download.html");  
        BufferedWriter writer = new BufferedWriter (new FileWriter (f));  
        String s;  
        while ((s = reader.readLine ()) != null)  
        {  
            writer.write (s);  
            writer.newLine ();  
            if (hasSubUrl (s))  
            {  
                downloadChild (getSubUrl (s));  
            }  
        }  
        is.close ();  
        reader.close ();  
        writer.close ();  
        System.out.println ("Download time(s):" + String.format ("%.3f", (double)(System.nanoTime () - start)/ 1000000000.00));  
    } // end of main  
  
    /** 
     * set root url for the downloading html 
     */  
    private static void setRootUrl ()  
    {  
        int pos = url.lastIndexOf ("/");  
        rootUrl = url.substring (0, pos);  
        System.out.println ("Root Url is:" + rootUrl);  
    }  
  
    /** 
     * check if content includes sub resources. 
     *  
     * @param text line of html content. 
     * @return Yes or Not 
     */  
    private static boolean hasSubUrl (String text)  
    {  
        if (StringUtils.isNotEmpty (text))  
        {  
            if (text.contains (urlSign) || text.contains (urlSign2))  
            {  
                return true;  
            }  
            return false;  
        }  
        else  
        {  
            return false;  
  
        }  
  
    }  
  

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值