今天来看一个swf格式的加密url逆向解析的例子:
简介
网页链接http://www.tvsky.tv/Industry/Show/278/33875/
通过浏览器加载进度条长度可以找到视频的地址:
视频详情:http://tvskysp.tvsky.tv:8082/hangyepindao/lvyou/16.flv
全局搜索视频详情中的部分关键元素也没有获得关键的信息:
鼠标指到视频上,在eletment页面找到加载的链接:
flvurl=lxxt4hGGB5e2T852Yfc5e2T88IFHl6b:b_3fOHU8s-qB2otjg3VfSc8-19K7_EhQ1Qa|lxxt3Gffa8H5w-85BIF3cZR06GDFj6b:b_6Irkx9T_Rc2otjg1TdQa49w3G29-Cl1Qa|lxxt3Gffa8H5w-85BIF3cZR06GDFj6b:b_6Irkx9T_Rc2otjg1TdQa49w3G296Cl1Qa|lxxt3Gffa8H5w-85BIF3cZR06GDFj6b:b_6Irkx9T_Rc2otjg1TdQa49w3G297Cl1Qa|lxxt3Gffa8H5w-85BIF3cZR06GDFj6b:b_6Irkx9T_Rc2otjg1TdQa49w3G298Cl1Qa|lxxt3Gffa8H5w-85BIF3cZR06GDFj6b:b_6Irkx9T_Rc2otjg1TdQa49w3G299Cl1Qa|lxxt3Gffa8H5w-85BIF3cZR06GDFj6b:b_6Irkx9T_Rc2otjg1TdQa49w3G29_Cl1Qa|lxxt3Gffa8H5w-85BIF3cZR06GDFj6b:b_6Irkx9T_Rc2otjg1TdQa49w3G2.Clr1a|lxxt3Gffa8H5w-85BIF3cZR06GDFj6b:b_6Irkx9T_Rc2otjg1TdQa49w3G2\Clr1a|lxxt3Gffa8H5w-85BIF3cZR06GDFj6b:b_6Irkx9T_Rc2otjg1TdQa49w3G2/Clr1a&isautoplay=1&adswf=
如果能把这一段解析出来,那么可能就是视频的真实播放url。
分析
那我们应该怎么做才能破解出这个URL呢?
首先,我们需要将这个页面上的Flash播放器给逆向一下,就像在爬HTML5视频网站碰到加密参数时逆向JavaScript一样。
但是Flash播放器是一个被编译后的.swf文件,我们并不能像JavaScript那样直接看到代码,需要先进行反编译。我们可以使用一款免费的flash反编译软件–JPEXS Free Flash Decompiler(FFDec),进行反编译。
github地址:https://github.com/jindrapetrik/jpexs-decompiler/releases
window版的下载地址:
https://github.com/jindrapetrik/jpexs-decompiler/releases/download/nightly1712/ffdec_11.2.0_nightly1712_setup.exe
安装的时候选择中文,直接下一步,中间要装一个java 1.8版本的jdk,选择默认安装即可。装好后如下:
安装好后,将element页的swf文件给下载下来:http://www.tvsky.tv/FlvPlay/Playerx.swf,它就是逆向对象。
用软件打开下载下来的swf文件,如下:
查看frame1 下面的DoAction脚本,大致浏览,脚本一共496行,很短,可以看到一些方法,如init(),可以可看到“正在加载Flv文件”,“正在缓冲”等操作视频的中文,可按ctrl+F进行搜索到,也可以搜索“flvurl”看一看在哪些地方出现(搜索flvurl是由于我们要解密的对象就是这种样式flvurl=lxxt4hGGB5e2T852Yfc5e2T88IFHl6b:b_3fOHU8s-qB2otjg3VfSc8-19K7_EhQ1Qa…),发现只有第35行出现过:
敏感的发现这个init就是我们需要的相关代码。
function init()
{
_isautoplay = isautoplay;
_title = titlecon;
_ggswf = adswf;
var _loc2_ = flvurl;
if(_loc2_ == undefined || _loc2_ == null)
{
_loc2_ = "";
}
else
{
_flvurl = _loc2_.split("|");
var _loc1_ = 0;
while(_loc1_ < _flvurl.length)
{
_flvurl[_loc1_] = Pass2Str(_flvurl[_loc1_]);
_loc1_ = _loc1_ + 1;
}
}
if(_ggswf == undefined || _ggswf == null)
{
_ggswf = "";
}
if(_ggswf.length == 0)
{
b_load._visible = true;
b_load.gotoAndStop(1);
yqbutton.loadflv._visible = true;
yqbutton.loadflv.loadflv.text = "正在加载Flv文件";
flvpp = setTimeout(FlvPlay,2000);
}
}
代码当中的Pass2Str()与它当中的NumS()方法都定位到:
function SNum(s, _PwdAddLen1)
{
var _loc1_ = PwdStr.indexOf(s);
_loc1_ = _loc1_ + (_PwdAddLen + _PwdAddLen1 + 1);
if(_loc1_ > PwdStr.length)
{
return PwdStr.substr(_loc1_ - PwdStr.length,1);
}
return PwdStr.substr(_loc1_,1);
}
function NumS(s, _PwdAddLen1)
{
var _loc1_ = PwdStr.indexOf(s);
_loc1_ = _loc1_ - (_PwdAddLen + _PwdAddLen1 - 1);
if(_loc1_ <= 0)
{
return PwdStr.substr(_loc1_ + PwdStr.length,1);
}
return PwdStr.substr(_loc1_,1);
}
function Pass2Str(Str)
{
var _loc2_ = "";
var _loc3_ = "";
var _loc4_ = 0;
var _loc1_ = 1;
while(_loc1_ <= Str.length)
{
_loc2_ = Str.substr(_loc1_,1);
if(_loc1_ % (_PwdLen + 1) != 0)
{
_loc3_ = _loc3_ + NumS(_loc2_,_loc4_);
}
else
{
_loc4_ = parseInt(_loc2_);
}
_loc1_ = _loc1_ + 1;
}
return _loc3_;
}
stop();
Stage.align = "TL";
Stage.scaleMode = "noScale";
myMenu = new ContextMenu();
myMenu.hideBuiltInItems();
_root.menu = myMenu;
var PwdStr = "AbCdEfGhIjKlMnOpQrStUvWxYzaBcDeFgHiJkLmNoPqRsTuVwXyZ1234509876-_.\\/:";
var PwdStrRan = "12345678987654321";
var _PwdLen = 4;
var adTimeId = "";
var adTimeIdtime = 0;
var _PwdAddLen = 4;
代码
将上面的代码翻译成python语言,如下:
注意需要将要解析的字符串进行处理,每一个“|”是一条url(通过测试发现):
flvurl=lxxt4hGGB5e2T852Yfc5e2T88IFHl6b:b_3fOHU8s-qB2otjg3VfSc8-19K7_EhQ1Qa|lxxt3Gffa8H5w-85BIF3cZR06GDFj6b:b_6Irkx9T_Rc2otjg1TdQa49w3G29-Cl1Qa|lxxt3Gffa8H5w-85BIF3cZR06GDFj6b:b_6Irkx9T_Rc2otjg1TdQa49w3G296Cl1Qa|lxxt3Gffa8H5w-85BIF3cZR06GDFj6b:b_6Irkx9T_Rc2otjg1TdQa49w3G297Cl1Qa|lxxt3Gffa8H5w-85BIF3cZR06GDFj6b:b_6Irkx9T_Rc2otjg1TdQa49w3G298Cl1Qa|lxxt3Gffa8H5w-85BIF3cZR06GDFj6b:b_6Irkx9T_Rc2otjg1TdQa49w3G299Cl1Qa|lxxt3Gffa8H5w-85BIF3cZR06GDFj6b:b_6Irkx9T_Rc2otjg1TdQa49w3G29_Cl1Qa|lxxt3Gffa8H5w-85BIF3cZR06GDFj6b:b_6Irkx9T_Rc2otjg1TdQa49w3G2.Clr1a|lxxt3Gffa8H5w-85BIF3cZR06GDFj6b:b_6Irkx9T_Rc2otjg1TdQa49w3G2\Clr1a|lxxt3Gffa8H5w-85BIF3cZR06GDFj6b:b_6Irkx9T_Rc2otjg1TdQa49w3G2/Clr1a&isautoplay=1&adswf=
_pwd_len = 4
_pwd_add_len = 4
pwd_str = "AbCdEfGhIjKlMnOpQrStUvWxYzaBcDeFgHiJkLmNoPqRsTuVwXyZ1234509876-_.\\/:"
def decode(flv_url: str):
"""
function init()
{
......
var _loc2_ = flvurl;
......
_flvurl = _loc2_.split("|");
var _loc1_ = 0;
while(_loc1_ < _flvurl.length)
{
_flvurl[_loc1_] = Pass2Str(_flvurl[_loc1_]);
_loc1_ = _loc1_ + 1;
}
......
}
:param flv_url: flash参数里的flvurl部分的value
:return: 解密后视频url列表
"""
new_flv_url = flv_url.split("|")
_loc1_ = 0
while _loc1_ < len(new_flv_url):
new_flv_url[_loc1_] = pass2str(new_flv_url[_loc1_])
_loc1_ += 1
return new_flv_url
def pass2str(str_: str):
"""
function Pass2Str(Str)
{
var _loc2_ = "";
var _loc3_ = "";
var _loc4_ = 0;
var _loc1_ = 1;
while(_loc1_ <= Str.length)
{
_loc2_ = Str.substr(_loc1_,1);
if(_loc1_ % (_PwdLen + 1) != 0)
{
_loc3_ = _loc3_ + NumS(_loc2_,_loc4_);
}
else
{
_loc4_ = parseInt(_loc2_);
}
_loc1_ = _loc1_ + 1;
}
return _loc3_;
}
:param str_: 加密的url字符串
:return: 解密后的url字符串
"""
_loc1_ = 1
_loc3_ = ""
_loc4_ = 0
while _loc1_ <= len(str_):
_loc2_ = str_[_loc1_ - 1]
if _loc1_ % (_pwd_len + 1) != 0:
_loc3_ = _loc3_ + num_s(_loc2_, _loc4_)
else:
_loc4_ = int(_loc2_) if _loc2_.isdigit() else 0
_loc1_ = _loc1_ + 1
return _loc3_
def num_s(s, _pwd_add_len1):
"""
function NumS(s, _PwdAddLen1)
{
var _loc1_ = PwdStr.indexOf(s);
_loc1_ = _loc1_ - (_PwdAddLen + _PwdAddLen1 - 1);
if(_loc1_ <= 0)
{
return PwdStr.substr(_loc1_ + PwdStr.length,1);
}
return PwdStr.substr(_loc1_,1);
}
"""
_loc1_ = pwd_str.index(s)
_loc1_ = _loc1_ - (_pwd_add_len + _pwd_add_len1 - 1)
if _loc1_ <= 0:
return pwd_str[_loc1_ + len(pwd_str) - 1]
return pwd_str[_loc1_ - 1]
if __name__ == '__main__':
str_url = "lxxt4hGGB5e2T852Yfc5e2T88IFHl6b:b_3fOHU8s-qB2otjg3VfSc8-19K7_EhQ1Qa" # 解析当前播放的视频
url_list = decode(str_url)
print(url_list) # 输出:http://tvskysp.tvsky.tv:8082/hangyepindao/lvyou/16.flv
总结
1.要仔细观察加密对象,尝试不同的办法解码,比如美拍的后半段是base64加密后得到的。
2.本例中的字符串对象,是多个url加密后的地址,中间通过“|”组合在一块的,但是每一条都很相似,判断它们是同一个东西。
如果你有空,可以想一想为什么使用js不能得到正确的结果,这有助于你理解python调用js:
js_content = """
function NumS(s, _PwdAddLen1)
{
var _loc1_ = PwdStr.indexOf(s);
_loc1_ = _loc1_ - (_PwdAddLen + _PwdAddLen1 - 1);
if(_loc1_ <= 0)
{
return PwdStr.substr(_loc1_ + PwdStr.length,1);
}
return PwdStr.substr(_loc1_,1);
}
function Pass2Str(Str)
{
var _loc2_ = "";
var _loc3_ = "";
var _loc4_ = 0;
var _loc1_ = 1;
while(_loc1_ <= Str.length)
{
_loc2_ = Str.substr(_loc1_,1);
if(_loc1_ % (_PwdLen + 1) != 0)
{
_loc3_ = _loc3_ + NumS(_loc2_,_loc4_);
}
else
{
_loc4_ = parseInt(_loc2_);
}
_loc1_ = _loc1_ + 1;
}
return _loc3_;
}
var PwdStr = "AbCdEfGhIjKlMnOpQrStUvWxYzaBcDeFgHiJkLmNoPqRsTuVwXyZ1234509876-_.\\/:";
var PwdStrRan = "12345678987654321";
var _PwdLen = 4;
var adTimeId = "";
var adTimeIdtime = 0;
var _PwdAddLen = 4;
"""
url_str = "lxxt5Ihhc3cZR01XUbY2Byq57hegK3/./77jSLY1L4JU4qvli2uErB30V2f8.fIr4tD"
import execjs
jsContent = execjs.compile(js_content)
print(jsContent.call("Pass2Str",url_str))
# 输出:UUQ2AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAsyC5AAAAAA
# 答案在文章中就有,你发现了吗?