java 视频图片_用JAVA爬取视频和图片

最新推荐文章于 2024-08-18 07:50:38 发布

weixin_39989796

最新推荐文章于 2024-08-18 07:50:38 发布

阅读量393

点赞数

文章标签： java 视频图片

本文链接：https://blog.csdn.net/weixin_39989796/article/details/114420134

版权

packagecom.download.util;

importorg.apache.commons.io.FileUtils;importorg.apache.http.HttpEntity;importorg.apache.http.StatusLine;importorg.apache.http.client.methods.CloseableHttpResponse;importorg.apache.http.client.methods.HttpGet;importorg.apache.http.impl.client.CloseableHttpClient;importorg.apache.http.impl.client.HttpClients;importorg.apache.http.util.EntityUtils;importorg.jsoup.Jsoup;importorg.jsoup.nodes.Document;importorg.jsoup.nodes.Element;importorg.jsoup.select.Elements;

importjava.io.File;importjava.io.IOException;importjava.net.URL;importjava.util.UUID;

/***@authorAdministrator*/public classDownloadVideoAndImage {

public static voidmain(String[] args) {

//开始页码intstartPage = 1;//结束页码intendPage = 5;//保存路径String savePath = "D:"+ File.separator+ "Desktop"+ File.separator+ "download";//设置编码String charset = "utf-8";

Thread thread1 = newThread(newRunnable() {

@Overridepublic voidrun() {

for(inti = 1;i < endPage;i++) {

String url = "https://ibaotu.com/shipin/7-0-0-0-0-"+ i + ".html";System.out.println("正在下载第"+ i + "页video。");try{

downloadVideoByURL(url,savePath,charset);} catch(Exception e) {

e.printStackTrace();}

System.out.println("第"+ i + "页video下载完成");}

}

});

Thread thread2 = newThread(newRunnable() {

@Overridepublic voidrun() {

try{

downloadVideoByPage(startPage,savePath,charset);} catch(Exception e) {

e.printStackTrace();}

}

});

Thread thread3 = newThread(newRunnable() {

@Overridepublic voidrun() {

try{

downloadImagePage(startPage,savePath,charset);} catch(Exception e) {

e.printStackTrace();}

}

});

thread1.start();thread2.start();thread3.start();}

/***@paramurl下载的目标地址*@paramsavePath保存的路径*@paramcharset编码*@throwsException*/public static voiddownloadVideoByURL(String url,String savePath,String charset) throwsIOException {

//根据URL获取htmlString content = getHtmlByURL(url,charset);//根据获取到的html得到Document对象Document document = getDocumentByHtml(content);//用标签选择器选择需要下载的元素Elements elements = document.select("div.media-list div.video-play video");//创建保存文件夹createSaveFileFolder(savePath);

for(Element element : elements) {

String videoUrl = element.attr("src");if(!(videoUrl.startsWith("https:"))) {

videoUrl = "https:"+ videoUrl;System.out.println(videoUrl);String uuid = getUUIDString();FileUtils.copyURLToFile(newURL(videoUrl), newFile(savePath,"downloadVideoByURL"+ File.separator+ uuid + ".mp4"));}

}

System.out.println("video下载完毕");}

/***通过递归调用下载**@parampage当前页码*@paramsavePath保存路径*@paramcharset编码*@throwsException*/public static voiddownloadVideoByPage(intpage,String savePath,String charset) throwsException {

System.out.println("downloadVideoByPage开始下载第"+ page + "页video。");//目标urlString url = "https://ibaotu.com/shipin/7-0-0-0-0-"+ page + ".html";//获取HtmlString content = getHtmlByURL(url,charset);//获取Document对象Document document = getDocumentByHtml(content);//通过select选择器选取所需元素Elements elements1 = document.select("div.media-list div.video-play video");//创建保存文件夹createSaveFileFolder(savePath);

for(Element element : elements1) {

String videoUrl = element.attr("src");if(!videoUrl.startsWith("https:")) {

}

System.out.println("downloadVideoByPage第"+ page + "页video下载完毕。");page++;downloadVideoByPage(page,savePath,charset);}

/***通过递归调用下载**@parampage当前页码*@paramsavePath保存路径*@paramcharset编码*@throwsException*/public static voiddownloadImagePage(intpage,String savePath,String charset) throwsException {

System.out.println("downloadImagePage正在下载第"+ page + "页image。");//目标URLString url = "https://ibaotu.com/shipin/7-0-0-0-0-"+ page + ".html";//获取HtmlString content = getHtmlByURL(url,charset);//获取Document对象Document document = getDocumentByHtml(content);//通过select选择器选取所需元素Elements elements = document.select("div.media-list div.show-image>img ");//创建保存文件夹createSaveFileFolder(savePath);

for(Element element : elements) {

String imgUrl = element.attr("data-url");//imgUrl不以https开头,前面拼接httpsif(!imgUrl.startsWith("https:")) {

imgUrl = "https:"+ imgUrl;System.out.println(imgUrl);String uuid = getUUIDString();FileUtils.copyURLToFile(newURL(imgUrl), newFile(savePath,"downloadImagePage"+ File.separator+ uuid + ".jpg"));}

}

System.out.println("downloadImagePage第"+ page + "页image下载完毕。");page++;downloadImagePage(page,savePath,charset);}

/***通过URL获取html页面**@paramurl目标url*@return*@throwsIOException*/public staticString getHtmlByURL(String url,String charset) throwsIOException {

//创建HttpClientCloseableHttpClient httpClient = HttpClients.createDefault();//获取连接HttpGet httpGet = newHttpGet(url);//获取响应CloseableHttpResponse httpResponse = httpClient.execute(httpGet);//获取响应状态码StatusLine statusLine = httpResponse.getStatusLine();intstatusCode = statusLine.getStatusCode();String content = null;//状态码200代表连接成功int ok = 200;if(statusCode == ok) {

//获取响应实体HttpEntity entity = httpResponse.getEntity();returnEntityUtils.toString(entity,charset);}

return"网络错误，请重试";}

/***获取Document对象**@paramhtml*@return*/public staticDocument getDocumentByHtml(String html) {

returnJsoup.parse(html);}

/***创建保存文件夹**@paramsavePath保存路径*/public static voidcreateSaveFileFolder(String savePath) {

File file = newFile(savePath);//保存路径没有以路径结尾，添加路径结尾if(!(savePath.endsWith(File.separator))) {

savePath = savePath + File.separator;}

//文件夹不存在，创建if((!file.exists())) {

file.mkdirs();}

}

/***生成UUID字符串并去除-**@return*/public staticString getUUIDString() {

returnUUID.randomUUID().toString().replace("-","");}