C# 实现爬虫获取视频
以这个大佬的网站为例
看到他后台访问的接口
第六行是视频加密方式 很明显是 AES的加密方式 这时候只要知道他的Key和IV(向量)就能轻而易举的获取到视频了,下面是所有的ts文件 --开搞
Key的位置很明显就是那个Key.key
key的问题解决了该轮到 IV 了 找寻很久没有发现一点16位字节的影子,翻看这篇文章的最下面找到有可能是 ts的文件名 正好8位2 拿到16位 待人进去 果然就是文件名2 这时候就开始Http开始爬了
文件拿是拿到了 不过这怎么看啊!! 每个都是4,5秒
还得想法子把ts文件全部合并 这时候我又看到 这篇文章
不得不说啊,还得是面向百度编程,但是今天属实是有点太晚了,就先不搞了,后面有闲时间补上
整个源码很简洁,也就200行代码搞定,有不少代码还是Copy的 这里我是想每天爬一次的 所以用Quartz实现的 如果你只想一次性就可以把 Execute中的代码放到你控制台的Main里 效果是一样的 (Net6 里控制台的Main函数被简化掉了)
也想使用Quartz的请看我这篇文章
想拿全部的视频 先爬首页拿到 M3u8的List 完事之后 代码中的 m3u8Url 换成动态的就 ok
using Quartz;
using QuzrtzJob.Service;
using System;
using System.Collections.Generic;
using System.Linq;
using System.Net;
using System.Security.Cryptography;
using System.Text;
using System.Text.RegularExpressions;
using System.Threading.Tasks;
namespace QuzrtzJob.Factory
{
public class TestJob : IJob
{
private static List<string> error_arr = new List<string>();
public Task Execute(IJobExecutionContext context)
{
string aesKey = "62e18599ccbc8be9";
string m3u8Url = "https://vod10.bdzybf.com/20211031/JFVHcrd7/1000kb/hls/index.m3u8"; //m3u8在线地址
string savePath = "D:\\VIDEO\\"; //保存的本地路径
string saveFileName = "VIDEO_FILE_NAME"; //保存的文件(夹)名称,如果为空 则使用默认m3u8文件名
try
{
// 创建本地保存目录
int index = m3u8Url.LastIndexOf("/");
string dirName = string.IsNullOrEmpty(saveFileName) ? m3u8Url.Substring(index + 1) : saveFileName;
string finalSavePath = savePath + dirName + "\\";
if (!Directory.Exists(finalSavePath))
{
Directory.CreateDirectory(finalSavePath);
}
// 读取m3u8文件内容
string m3u8Content = HttpGet(m3u8Url);
Uri uri = new Uri(m3u8Url);
string domain = uri.Scheme + "://" + uri.Authority;
List<string> tsList = Regex.Matches(m3u8Content, @"\n(.*?.ts)").Select(m => m.Value).ToList();
//居然真的是文件名×2
List<string> ivList = tsList.Select(x => x.Substring(x.Length - 11, 8)).ToList();
if (tsList.Count != ivList.Count || tsList.Count == 0)
{
Console.WriteLine("m3u8Content 解析失败");
}
else
{
Console.WriteLine("m3u8Content 解析完成,共有 " + ivList.Count + " 个ts文件");
for (int i = 0; i < tsList.Count; i++)
{
string ts = tsList[i].Replace("\n", "");
string iv = ivList[i].Replace("\n", "");
int idx = ts.LastIndexOf("/");
string tsFileName = ts.Substring(idx + 1);
try
{
string saveFilepath = finalSavePath + tsFileName;
if (!File.Exists(saveFilepath))
{
Console.WriteLine("开始下载ts: " + domain + ts);
byte[] encByte = HttpGetByte(domain + ts);
if (encByte != null)
{
Console.WriteLine("开始解密, IV -> " + iv);
byte[] decByte = null;
try
{
decByte = AESDecrypt2(encByte, aesKey, iv+iv);
}
catch (Exception e1)
{
error_arr.Add(tsFileName);
Console.WriteLine("解密ts文件异常。" + e1.Message);
}
if (decByte != null)
{
//保存视频文件
File.WriteAllBytes(saveFilepath, decByte);
Console.WriteLine(tsFileName + " 下载完成");
}
}
else
{
error_arr.Add(tsFileName);
Console.WriteLine("HttpGetByte 结果返回null");
}
}
else
{
Console.WriteLine($"文件 {saveFilepath} 已存在");
}
}
catch (Exception ee)
{
error_arr.Add(tsFileName);
Console.WriteLine("发生异常。" + ee);
}
}
}
}
catch (Exception ex)
{
Console.WriteLine("发生异常。" + ex);
}
Console.WriteLine("所有操作已完成. 保存目录 " + savePath);
if (error_arr.Count > 0)
{
List<string> list = error_arr.Distinct().ToList();
Console.WriteLine($"其中 共有{error_arr.Count}个文件下载失败:");
list.ForEach(x =>
{
Console.WriteLine(x);
});
}
Console.ReadKey();
return null;
}
public static string HttpGet(string url)
{
try
{
HttpWebRequest request = (HttpWebRequest)WebRequest.Create(url);
request.Timeout = 20000;
var response = (HttpWebResponse)request.GetResponse();
using (StreamReader reader = new StreamReader(response.GetResponseStream(), Encoding.UTF8))
{
return reader.ReadToEnd();
}
}
catch (Exception ex)
{
Console.Write("HttpGet 异常," + ex.Message);
Console.Write(ex);
return "";
}
}
public static byte[] HttpGetByte(string url)
{
try
{
byte[] arraryByte = null;
HttpWebRequest request = (HttpWebRequest)HttpWebRequest.Create(url);
request.Timeout = 20000;
request.Method = "GET";
using (WebResponse wr = request.GetResponse())
{
int length = (int)wr.ContentLength;
using (StreamReader reader = new StreamReader(wr.GetResponseStream(), Encoding.UTF8))
{
HttpWebResponse response = wr as HttpWebResponse;
Stream stream = response.GetResponseStream();
//读取到内存
MemoryStream stmMemory = new MemoryStream();
byte[] buffer1 = new byte[length];
int i;
//将字节逐个放入到Byte 中
while ((i = stream.Read(buffer1, 0, buffer1.Length)) > 0)
{
stmMemory.Write(buffer1, 0, i);
}
arraryByte = stmMemory.ToArray();
stmMemory.Close();
}
}
return arraryByte;
}
catch (Exception ex)
{
Console.Write("HttpGetByte 异常," + ex.Message);
Console.Write(ex);
return null;
}
}
/// <summary>
/// AES解密
/// </summary>
/// <param name="cipherText"></param>
/// <param name="Key"></param>
/// <param name="IV"></param>
/// <returns></returns>
public static byte[] AESDecrypt2(byte[] cipherText, string Key, string IV)
{
// Check arguments.
if (cipherText == null || cipherText.Length <= 0)
throw new ArgumentNullException("cipherText");
if (Key == null || Key.Length <= 0)
throw new ArgumentNullException("Key");
if (IV == null || IV.Length <= 0)
throw new ArgumentNullException("IV");
// Declare the string used to hold
// the decrypted text.
byte[] res = null;
// Create an AesManaged object
// with the specified key and IV.
using (AesManaged aesAlg = new AesManaged())
{
aesAlg.Key = Encoding.ASCII.GetBytes(Key);
aesAlg.IV = Encoding.ASCII.GetBytes(IV);
aesAlg.Mode = CipherMode.CBC;
aesAlg.Padding = PaddingMode.PKCS7;
// Create a decrytor to perform the stream transform.
ICryptoTransform decryptor = aesAlg.CreateDecryptor(aesAlg.Key, aesAlg.IV);
// Create the streams used for decryption.
using (MemoryStream msDecrypt = new MemoryStream(cipherText))
{
using (CryptoStream csDecrypt = new CryptoStream(msDecrypt, decryptor, CryptoStreamMode.Read))
{
byte[] tmp = new byte[cipherText.Length + 32];
int len = csDecrypt.Read(tmp, 0, cipherText.Length + 32);
byte[] ret = new byte[len];
Array.Copy(tmp, 0, ret, 0, len);
res = ret;
}
}
}
return res;
}
}
}