packagecom.download.util;
importorg.apache.commons.io.FileUtils;importorg.apache.http.HttpEntity;importorg.apache.http.StatusLine;importorg.apache.http.client.methods.CloseableHttpResponse;importorg.apache.http.client.methods.HttpGet;importorg.apache.http.impl.client.CloseableHttpClient;importorg.apache.http.impl.client.HttpClients;importorg.apache.http.util.EntityUtils;importorg.jsoup.Jsoup;importorg.jsoup.nodes.Document;importorg.jsoup.nodes.Element;importorg.jsoup.select.Elements;
importjava.io.File;importjava.io.IOException;importjava.net.URL;importjava.util.UUID;
/***@authorAdministrator*/public classDownloadVideoAndImage {
public static voidmain(String[] args) {
//开始页码intstartPage = 1;//结束页码intendPage = 5;//保存路径String savePath = "D:"+ File.separator+ "Desktop"+ File.separator+ "download";//设置编码String charset = "utf-8";
Thread thread1 = newThread(newRunnable() {
@Overridepublic voidrun() {
for(inti = 1;i < endPage;i++) {
String url = "https://ibaotu.com/shipin/7-0-0-0-0-"+ i + ".html";System.out.println("正在下载第"+ i + "页video。");try{
downloadVideoByURL(url,savePath,charset);} catch(Exception e) {
e.printStackTrace();}
System.out.println("第"+ i + "页video下载完成");}
}
});
Thread thread2 = newThread(newRunnable() {
@Overridepublic voidrun() {
try{
downloadVideoByPage(startPage,savePath,charset);} catch(Exception e) {
e.printStackTrace();}
}
});
Thread thread3 = newThread(newRunnable() {
@Overridepublic voidrun() {
try{
downloadImagePage(startPage,savePath,charset);} catch(Exception e) {
e.printStackTrace();}
}
});
thread1.start();thread2.start();thread3.start();}
/***@paramurl下载的目标地址*@paramsavePath保存的路径*@paramcharset编码*@throwsException*/public static voiddownloadVideoByURL(String url,String savePath,String charset) throwsIOException {
//根据URL获取htmlString content = getHtmlByURL(url,charset);//根据获取到的html得到Document对象Document document = getDocumentByHtml(content);//用标签选择器选择需要下载的元素Elements elements = document.select("div.media-list div.video-play video");//创建保存文件夹createSaveFileFolder(savePath);
for(Element element : elements) {
String videoUrl = element.attr("src");if(!(videoUrl.startsWith("https:"))) {
videoUrl = "https:"+ videoUrl;System.out.println(videoUrl);String uuid = getUUIDString();FileUtils.copyURLToFile(newURL(videoUrl), newFile(savePath,"downloadVideoByURL"+ File.separator+ uuid + ".mp4"));}
}
System.out.println("video下载完毕");}
/***通过递归调用下载**@parampage当前页码*@paramsavePath保存路径*@paramcharset编码*@throwsException*/public static voiddownloadVideoByPage(intpage,String savePath,String charset) throwsException {
System.out.println("downloadVideoByPage开始下载第"+ page + "页video。");//目标urlString url = "https://ibaotu.com/shipin/7-0-0-0-0-"+ page + ".html";//获取HtmlString content = getHtmlByURL(url,charset);//获取Document对象Document document = getDocumentByHtml(content);//通过select选择器选取所需元素Elements elements1 = document.select("div.media-list div.video-play video");//创建保存文件夹createSaveFileFolder(savePath);
for(Element element : elements1) {
String videoUrl = element.attr("src");if(!videoUrl.startsWith("https:")) {
videoUrl = "https:"+ videoUrl;System.out.println(videoUrl);String uuid = getUUIDString();FileUtils.copyURLToFile(newURL(videoUrl), newFile(savePath,"downloadVideoByPage"+ File.separator+ uuid + ".mp4"));}
}
System.out.println("downloadVideoByPage第"+ page + "页video下载完毕。");page++;downloadVideoByPage(page,savePath,charset);}
/***通过递归调用下载**@parampage当前页码*@paramsavePath保存路径*@paramcharset编码*@throwsException*/public static voiddownloadImagePage(intpage,String savePath,String charset) throwsException {
System.out.println("downloadImagePage正在下载第"+ page + "页image。");//目标URLString url = "https://ibaotu.com/shipin/7-0-0-0-0-"+ page + ".html";//获取HtmlString content = getHtmlByURL(url,charset);//获取Document对象Document document = getDocumentByHtml(content);//通过select选择器选取所需元素Elements elements = document.select("div.media-list div.show-image>img ");//创建保存文件夹createSaveFileFolder(savePath);
for(Element element : elements) {
String imgUrl = element.attr("data-url");//imgUrl不以https开头,前面拼接httpsif(!imgUrl.startsWith("https:")) {
imgUrl = "https:"+ imgUrl;System.out.println(imgUrl);String uuid = getUUIDString();FileUtils.copyURLToFile(newURL(imgUrl), newFile(savePath,"downloadImagePage"+ File.separator+ uuid + ".jpg"));}
}
System.out.println("downloadImagePage第"+ page + "页image下载完毕。");page++;downloadImagePage(page,savePath,charset);}
/***通过URL获取html页面**@paramurl目标url*@return*@throwsIOException*/public staticString getHtmlByURL(String url,String charset) throwsIOException {
//创建HttpClientCloseableHttpClient httpClient = HttpClients.createDefault();//获取连接HttpGet httpGet = newHttpGet(url);//获取响应CloseableHttpResponse httpResponse = httpClient.execute(httpGet);//获取响应状态码StatusLine statusLine = httpResponse.getStatusLine();intstatusCode = statusLine.getStatusCode();String content = null;//状态码200代表连接成功int ok = 200;if(statusCode == ok) {
//获取响应实体HttpEntity entity = httpResponse.getEntity();returnEntityUtils.toString(entity,charset);}
return"网络错误,请重试";}
/***获取Document对象**@paramhtml*@return*/public staticDocument getDocumentByHtml(String html) {
returnJsoup.parse(html);}
/***创建保存文件夹**@paramsavePath保存路径*/public static voidcreateSaveFileFolder(String savePath) {
File file = newFile(savePath);//保存路径没有以路径结尾,添加路径结尾if(!(savePath.endsWith(File.separator))) {
savePath = savePath + File.separator;}
//文件夹不存在,创建if((!file.exists())) {
file.mkdirs();}
}
/***生成UUID字符串并去除-**@return*/public staticString getUUIDString() {
returnUUID.randomUUID().toString().replace("-","");}
}