/*
*****************************************************************************
* This software is under the Apache License Version 2.0
* Author: Tao - mail:cn.java.river@gmail.com
* Spreading Your Heart
****************************************************************************
*/
package atao.util.html;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileOutputStream;
import java.io.FileWriter;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.URL;
import org.apache.commons.lang.StringUtils;
/**
*
* A Simple HTML downloader which can also download Page resources.
* <br/>
* <b>Note: This Tool won't download related or sub HTML</b>
*
* @author <a href="mailto:cn.java.river@gmail.com">Tao</a>
* @since 1.0
*/
public class HtmlDownloader
{
//URL will be downloaded.
private static String url = "http://pervasive2.morselli.unimo.it/~nicola/courses/IngegneriaDelSoftware/java/J6d_xml.html";
//workspace folder.
private static String workspace = "download";
//sub css and js resources sign
private static String urlSign = "<link href=";
//sub image resources sign
private static String urlSign2 = "src=";
//URL parent.
private static String rootUrl = null;
public static void main (String[] args) throws Exception
{
long start = System.nanoTime ();
setRootUrl ();
URL u = new URL (url);
InputStream is = u.openStream ();
BufferedReader reader = new BufferedReader (new InputStreamReader (is));
File f = createDownloadFile ("download.html");
BufferedWriter writer = new BufferedWriter (new FileWriter (f));
String s;
while ((s = reader.readLine ()) != null)
{
writer.write (s);
writer.newLine ();
if (hasSubUrl (s))
{
downloadChild (getSubUrl (s));
}
}
is.close ();
reader.close ();
writer.close ();
System.out.println ("Download time(s):" + String.format ("%.3f", (double)(System.nanoTime () - start)/ 1000000000.00));
} // end of main
/**
* set root url for the downloading html
*/
private static void setRootUrl ()
{
int pos = url.lastIndexOf ("/");
rootUrl = url.substring (0, pos);
System.out.println ("Root Url is:" + rootUrl);
}
/**
* check if content includes sub resources.
*
* @param text line of html content.
* @return Yes or Not
*/
private static boolean hasSubUrl (String text)
{
if (StringUtils.isNotEmpty (text))
{
if (text.contains (urlSign) || text.contains (urlSign2))
{
return true;
}
return false;
}
else
{
return false;
}
}
酷酷酷
最新推荐文章于 2021-02-03 00:00:34 发布