利用htmlunit下载网页上的文件(未测试)

import java.io.FileOutputStream;
import java.io.InputStream;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
 
import org.apache.commons.io.IOUtils;
 
import com.gargoylesoftware.htmlunit.Page;
import com.gargoylesoftware.htmlunit.WebClient;
 
public class DownloadFile {
    public static void main(String[] args) throws Exception {
        String baseUrl = "<a href="http://hanyu.iciba.com/hanzi/1.shtml";" target="_blank">http://hanyu.iciba.com/hanzi/1.shtml";</a>
        String bihuaRegex = "class=\"guanggao\"[^<]*<[^<]*<param\\s*name=\"movie\"\\s*value=\"([^\"]*)";
        String aSoundRegex = "class=\"js12\">ā.*?name=\"FlashVars\"\\s*value=\"f=([^\"]*)";
        String eSoundRegex = "class=\"js12\">ē.*?name=\"FlashVars\"\\s*value=\"f=([^\"]*)";
        WebClient client = new WebClient();
        client.getOptions().setCssEnabled(false);
        client.getOptions().setJavaScriptEnabled(false);
        client.getOptions().setThrowExceptionOnFailingStatusCode(false);
        client.getOptions().setThrowExceptionOnScriptError(false);
        Page page = client.getPage(baseUrl);
        String source = page.getWebResponse().getContentAsString();
        Matcher mBihuan = Regex(source, bihuaRegex);
        Matcher mA = Regex(source, aSoundRegex);
        Matcher mE = Regex(source, eSoundRegex);
        while(mBihuan.find()) {
            String url = "<a href="http://hanyu.iciba.com/" + mBihuan.group" target="_blank">http://hanyu.iciba.com/" + mBihuan.group</a>(1);
            page = client.getPage(url);
            saveFile(page, "d:/testDownload/bihua.swf");
        }
        while(mA.find()) {
            String url = mA.group(1);
            page = client.getPage(url);
            saveFile(page, "d:/testDownload/a.mp3");
        }
        while(mE.find()) {
            String url = mE.group(1);
            page = client.getPage(url);
            saveFile(page, "d:/testDownload/e.mp3");
        }
    }
     
    public static Matcher Regex(String source, String regex) {
        Pattern p = Pattern.compile(regex, Pattern.DOTALL);
        return p.matcher(source);
    }
     
    public static void saveFile(Page page, String file) throws Exception {
        InputStream is = page.getWebResponse().getContentAsStream();
        FileOutputStream output = new FileOutputStream(file);
        IOUtils.copy(is, output);
        output.close();
    }
}
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值