/**
* All rights Reserved, Designed By Android_Robot
* @Title: TwoThreadDemo.java
* @Package downloadHtml
* @Description: TODO
* @author: mao.wang
* @date: 2016年5月20日 下午12:06:08
* @version V1.0
*/
package downloadHtml;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.net.MalformedURLException;
import org.apache.commons.io.IOUtils;
import org.apache.commons.lang3.StringUtils;
import com.gargoylesoftware.htmlunit.FailingHttpStatusCodeException;
import com.gargoylesoftware.htmlunit.Page;
import com.gargoylesoftware.htmlunit.WebClient;
import com.gargoylesoftware.htmlunit.html.DomElement;
import com.gargoylesoftware.htmlunit.html.DomNodeList;
import com.gargoylesoftware.htmlunit.html.HtmlPage;
/**
* @ClassName: TwoThreadDemo
* @Description:
* 多线程下载考虑nexus可能无法支撑所以只在主目录重启线程
* @author: mao.wang
* @date: 2016年5月19日 下午10:06:08
*
*/
public class TwoThreadDemo implements Runnable{
private String curl;
private String localPath;
/**
* @return the curl
*/
public String getCurl() {
return curl;
}
/**
* @param curl the curl to set
*/
public void setCurl(String curl) {
this.curl = curl;
}
/**
* @return the localPath
*/
public String getLocalPath() {
return localPath;
}
/**
* @param localPath the localPath to set
*/
public void setLocalPath(String localPath) {
this.localPath = localPath;
}
/**
* @param args
*/
@Override
public void run() {
Blog(this.getCurl(),this.getLocalPath(),false);
}
public static void main(String[] args) {
//nexus地址
String Url="http://10.201.96.114:9000/nexus/content/groups/public/";
//存放路径*:/*/ 格式请写正确
String localPath="D:/nexus/file4/";
TwoThreadDemo td=new TwoThreadDemo();
td.Blog(Url,localPath,true);
}
public void Blog(final String url,final String localPath,boolean flag) {
final WebClient webClient=new WebClient();
webClient.getOptions().setCssEnabled(false);
webClient.getOptions().setJavaScriptEnabled(false);
HtmlPage page;
try {
page = webClient.getPage(url);
DomNodeList<DomElement> domNodeList=page.getElementsByTagName("a");
for(DomElement dome:domNodeList){
final String curl=dome.getAttribute("href");
if(StringUtils.isNoneEmpty(curl)&&20<curl.length()){
String lastCode=curl.substring(curl.length()-1, curl.length());
System.out.println(curl);
if(lastCode.equals("/")){
if(flag){
TwoThreadDemo demo=new TwoThreadDemo();
demo.setCurl(curl);
demo.setLocalPath(localPath);
Thread thread1 = new Thread(demo);
thread1.start();
}else{
TwoThreadDemo td=new TwoThreadDemo();
td.Blog(curl,localPath,false);
}
}else{
int i=0;
boolean errorflag=false;
do{
try{
downFile(webClient.getPage(curl), localPath+curl.split("public/")[1]);
errorflag=false;
}catch(Exception e){
System.out.println("文件异常次数"+i);
errorflag=i<=10?true:false;
}
}while(errorflag);
System.out.println("文件");
System.out.println(localPath+curl.split("public/")[1]);
}
}
}
//System.out.println(page.asXml());
} catch (FailingHttpStatusCodeException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (MalformedURLException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}finally{
webClient.closeAllWindows();
}
}
private void downFile(Page page, String file) {
InputStream is;
FileOutputStream output;
try {
is = page.getWebResponse().getContentAsStream();
File f=new File(file.substring(0,file.lastIndexOf("/")));
if (!f.exists()) {
f.mkdirs();
System.out.println("创建文件夹"+f.getPath());
}
File filea=new File(file);
if(!filea.exists()){
output = new FileOutputStream(file);
IOUtils.copy(is, output);
output.close();
System.out.println("创建文件"+file);
}else{
System.out.println("文件已存在跳过");
};
} catch (Exception e1) {
e1.printStackTrace();
// TODO Auto-generated catch block
System.out.println("下载失败"+page.getUrl());
}
}
}
</pre><pre code_snippet_id="1690563" snippet_file_name="blog_20160520_3_269444" name="code" class="html">需要jar包在htmlunit官网下载
https://sourceforge.net/projects/htmlunit/files/htmlunit/2.21/