JAVA获取视频（TS类型）

最新推荐文章于 2024-04-29 16:35:27 发布

zzh_pride

最新推荐文章于 2024-04-29 16:35:27 发布

阅读量2.6k

点赞数

文章标签： java 数据抓取爬虫 jsoup

本文链接：https://blog.csdn.net/zzh_pride/article/details/107303129

版权

JAVA获取视频（TS类型）

在爬取视频的时候有的时候会碰到m3u8格式的视频，这种类型的视频是通过一个个片段进行播放。

1.这种视频（https://ifeng.com-v-ifeng.com/20180716/21960_f0f836f8/index.m3u8）直接去访问的时候会显示如下图所示文件。

2.所获得的内容中有“1000k/hls/index.m3u8”这样一行，发现这个正好是视频中第一个请求的地址，根据这个地址再访问（https://ifeng.com-v-ifeng.com/20180716/21960_f0f836f8/1000k/hls/index.m3u8），便可获得每个片段的地址，我们可以通过访问这些片段进行下载，最后合成视频。

具体代码如下

package Test.Write;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.HttpURLConnection;
import java.net.MalformedURLException;
import java.net.URL;
import java.net.URLConnection;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Date;
import java.util.HashMap;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

/**
 *  
 * 获取视频(MP4类型)
 * @author Zzh
 *
 */
public class CatchVideo2 {

	/** 视频名称*/
	private static String videoName;

	/** 视频前缀*/
	private static String videoPathPrefix;
	
	/** 设置日期格式*/
	private static SimpleDateFormat df = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");

	/**
	 * 主程序
	 * @param args
	 */
	public static void main(String[] args) {
		
		System.out.println(df.format(new Date()) + ":开始准备下载。");
		// 地址集
		List<String> downLoadUrls = new ArrayList<String>();
		// 视频的主页面
		String htmlmain = getHtml("http://www.yhdm.tv/show/1014.html");
		// 获取每集页面的地址
		List<String> urls = parseHtmlMain(htmlmain);

		String mainurl = "http://www.yhdm.tv";
		for (String url : urls) {
			// 每集真正播放地址
			String html = getHtml(mainurl + url);
			String downLoadUrl = parseHtml(html);
			downLoadUrls.add(downLoadUrl);
		}

		httpDownload(downLoadUrls);
	}

	/**
	 * 获取网页html代码
	 * @param 网址
	 */
	private static String getHtml(String path){

		System.out.println(df.format(new Date()) + ":获取" + path + "页面代码。");
		// 保存整个html文档的数据
		StringBuffer html = new StringBuffer();

		try {
			// 发起一个url网址的请求
			URL url = new URL(path);
			URLConnection connection = url.openConnection();

			// 获取网页的数据流
			InputStream input = connection.getInputStream();                 
			InputStreamReader reader = new InputStreamReader(input, "UTF-8");  
			BufferedReader bufferedReader = new BufferedReader(reader);  
			// 解析并且获取InputStream中具体的数据，并且输出到控制台
			String line = "";
			while((line = bufferedReader.readLine()) != null)
			{
				// 将所有读到的每行信息line追加到（拼接到）html对象上
				html.append(line); 
			}

		} catch (MalformedURLException e) {
			e.printStackTrace();
		} catch (IOException e) {
			e.printStackTrace();
		}
		return html.toString();
	}

	/**
	 * 获取集数
	 * @param HTML内容
	 * @return 视频地址
	 */
	private static List<String> parseHtmlMain(String html) {
		
		System.out.println(df.format(new Date()) + ":获取集数。");
		Document document = Jsoup.parse(html);
		// 获取id为main0的元素
		Element main = document.getElementById("main0");
		Elements urlLinks = main.getElementsByTag("a");
		List<String> urls = new ArrayList<String>();
		// 每集地址添加
		for (Element urlLink : urlLinks) {
			String name = urlLink.html();
			if (name.contains("CM") || name.contains("PV")) {
				continue;
			}
			urlLink.attr("href");
			urls.add(urlLink.attr("href"));
		}
		Elements videoNameH1= document.getElementsByTag("h1");
		// 视频名
		videoName = videoNameH1.get(0).text().replace(":", "").replace("/", "")
				.replace("\\", "").replace("*", "").replace("?", "")
				.replace("|", "").replace("<", "").replace(">", "");

		return urls;
	}

	/**
	 * 解析HTML
	 * @param HTML内容
	 * @return 视频地址
	 */
	private static String parseHtml(String html) {
		Document document = Jsoup.parse(html);
		Element dplayer = document.getElementById("play_1");
		String videoUrl = dplayer.attr("onclick");
		videoUrl = videoUrl.replace("changeplay('", "");
		videoUrl = videoUrl.replace("$mp4');", "");

		return videoUrl;
	}

	/**
	 * 下载视频
	 * @param 视频地址集
	 */
	public static boolean httpDownload(List<String> httpUrls) {

		// 设置路径
		String saveFile = "D:\\视频\\" + videoName;
		String saveFileVideo = "D:\\视频\\" + videoName +"\\" + videoName;
		System.out.println(df.format(new Date()) + ":地址集获取完毕准备开始下载。");
		int i = 0;
		
		for (String httpUrl : httpUrls) {
			// 合成用MAP
			HashMap<Integer, String> keyFileMap = new HashMap<Integer, String>();
			// 下载索引文件
			String indexStr = getIndexFile(httpUrl);
			// 解析索引文件
			List<String> videoUrlsList = analysisIndex(indexStr);
			i++;
			int j = 0;
			for (String videoUrl : videoUrlsList) {
				try {
					j++;
					int byteRead;
					URL url;
					// 创建文件
					File file = new File(saveFile);
					if(!file.exists()){
						file.getParentFile().mkdir();
						file.mkdirs();
					}
					
					File fileVideo = new File(saveFileVideo);
					if(!fileVideo.exists()){
						fileVideo.getParentFile().mkdir();
						fileVideo.mkdirs();
					}

					try {
						url = new URL(videoPathPrefix + videoUrl);
					} catch (MalformedURLException e1) {
						e1.printStackTrace();
						continue;
					}

					try {

						// 写入文件
						String st_saveFilename = "";
						st_saveFilename= saveFile + "\\" + videoName + i + "_" + j + ".mp4";
						File file_saveFilename = new File(st_saveFilename);
						if(!file_saveFilename.exists()){
							// 获取链接
							URLConnection conn = url.openConnection();
							HttpURLConnection httpURLConnection = (HttpURLConnection)conn;
							httpURLConnection.setInstanceFollowRedirects(false);
							// 输入流
							InputStream inStream = httpURLConnection.getInputStream();
							FileOutputStream fs = new FileOutputStream(st_saveFilename);

							byte[] buffer = new byte[1024];
							while ((byteRead = inStream.read(buffer)) != -1) {
								fs.write(buffer, 0, byteRead);
							}
							inStream.close();
							fs.close();
							System.out.println(videoName + "第" + i + "集" + j + "片段下载好了");
						} else {
							System.out.println(videoName + "第" + i + "集" + j + "片段已存在");
						}
						keyFileMap.put(j - 1, st_saveFilename);
					} catch (FileNotFoundException e) {
						System.out.println(videoName + "第" + i + "集" + j + "片段不存在");
					} 
				} catch (IOException e) {
					e.printStackTrace();
					System.out.println(videoName + "第" + i + "集" + j + "片段超时");
				} 
			}
			// 合成视频片段
			composeFile(saveFileVideo + "\\" + videoName + i + ".mp4", keyFileMap);
			System.out.println(df.format(new Date()) + ":" + videoName + i + "集完成");
		}
		return true;
	}

	/**
	 * 下载索引
	 * @param content
	 */
	public static String getIndexFile(String urlpath){
		try{
			URL url = new URL(urlpath);

			//下在资源
			BufferedReader in = new BufferedReader(new InputStreamReader(url.openStream(), "UTF-8"));
			String content = "" ;
			String line;
			String indexUrl = "";
			int i = 0;
			while ((line = in.readLine()) != null) {
				i++;
				content += line + "\n";
				if (i==2) {
					indexUrl = content;
				}
			}
			// 转换为获取到的索引文件地址
			urlpath = urlpath.replace("index.m3u8", "") + content.replace(indexUrl,"");
			// 获取视频链接目录
			videoPathPrefix= urlpath.replace("index.m3u8", "").replace("\n", "");
			// 获取索引
			URL url2 = new URL(urlpath);
			URLConnection conn2 = url2.openConnection();
			HttpURLConnection httpURLConnection2 = (HttpURLConnection)conn2;
			httpURLConnection2.setInstanceFollowRedirects(false);

			try {
				// 输入流
				BufferedReader in2 = new BufferedReader(new InputStreamReader(httpURLConnection2.getInputStream(), "UTF-8"));
				String content2 = "" ;
				String line2;
				while ((line2 = in2.readLine()) != null) {
					content2 += line2 + "\n";
				}
				in2.close();

				return content2;
			} catch (FileNotFoundException e) {
				System.out.println(videoName + "链接错误");
			} 

			return content;
		}catch (Exception e){
			e.printStackTrace();
		}
		return null;
	}

	/**
	 * 解析索引
	 * @param content
	 */
	public static List<String> analysisIndex(String content){
		Pattern pattern = Pattern.compile(".*ts");
		Matcher ma = pattern.matcher(content);

		List<String> list = new ArrayList<String>();

		while(ma.find()){
			String s = ma.group();
			list.add(s);
		}
		return list;
	}

	/**
	 * 视频片段合成
	 * @param fileOutPath
	 * @param keyFileMap
	 */
	public static void composeFile(String fileOutPath, HashMap<Integer,String> keyFileMap){
		try {
			FileOutputStream fileOutputStream = new FileOutputStream(new File(fileOutPath));
			byte[] bytes = new byte[1024];
			int length = 0;
			for(int i=0;i<keyFileMap.size();i++){
				String nodePath = keyFileMap.get(i);
				File file = new File(nodePath);
				if(!file.exists())
					continue;
				FileInputStream fis = new FileInputStream(file);
				while ((length = fis.read(bytes)) != -1) {
					fileOutputStream.write(bytes, 0, length);
				}
			}
		}catch (Exception e){
			System.out.println("视频合成失败");
		}
	}
}

zzh_pride

关注

0
点赞
踩
16

收藏

觉得还不错? 一键收藏
0
评论
JAVA获取视频（TS类型）

JAVA获取视频（TS类型）在爬取视频的时候有的时候会碰到m3u8格式的视频，这种类型的视频是通过一个个片段进行播放。1.这种视频（https://ifeng.com-v-ifeng.com/20180716/21960_f0f836f8/index.m3u8）直接去访问的时候会显示如下图所示文件。第一次访问所获得内容2.所获得的内容中有“1000k/hls/index.m3u8”这样一行，发现这个正好是视频中第一个请求的地址，根据这个地址再访问（https://ifeng.com-v-if
复制链接

扫一扫