百度云里的一个avi 视频被扫黄毁坏掉了,于是想研究下是怎么回事。
参考
http://www.alexander-noe.com/video/documentation/avi.pdf
http://blog.csdn.net/d_l_u_f/article/details/7309212
python 代码读 LIST 信息
import os
def charSequece2Int(data):
ret = 0
td = data[::-1]
for x in td:
ret = ret * 16 * 16 + ord(x)
#print 'data=',data, 'ret=',ret
return ret
def readList(data, cursor, prefix=''):
oldCursor = cursor
List = {}
if len(data) < cursor + 12:
return List
List['dwList'] = data[cursor: cursor + 4]
cursor += 4
dwSize = charSequece2Int(data[cursor: cursor + 4]);
List['dwSize'] = dwSize
if dwSize == 0:
return {}
cursor += 4
if List['dwList'] != 'LIST' and List['dwList'] != 'RIFF':
#CHUNK
List['dwFourCC'] = List['dwList']
del List['dwList']
List['data'] = data[cursor: cursor + dwSize]
cursor += dwSize
else:
#LIST
List['dwFourCC'] = data[cursor: cursor + 4]
cursor += 4
List['data'] = data[cursor: cursor + dwSize - 4]
cursor += dwSize - 4
List['len'] = cursor - oldCursor
if 'dwList' in List: print prefix,'dwList:', List['dwList']
print prefix, 'dwSize:', str(dwSize)
print prefix, 'dwFourCC:', List['dwFourCC']
return List
def recursiveReadList(data, cursor, prefix=''):
while True:
print
List = readList(data, cursor, prefix)
listType = ['LIST', 'RIFF']
if('dwList' in List and List['dwList'] in listType and List['dwFourCC'] != 'movi'):
recursiveReadList(List['data'], 0, prefix + '\t')
if 'len' in List:
cursor += List['len']
else:
break
def main():
#avi = '../xs.avi'
avi = '../jpm.avi'
data = open(avi, 'rb').read()
print('len(data)=',len(data));
cursor = 0
#List = readList(data, cursor)
#print
#cursor += 12
recursiveReadList(data, cursor)
#while 'data' in List:
# List = readList(data, cursor)
# if 'data' in List:
# cursor += List['len']
# print
if __name__ == '__main__': main()
经过对比发现文件的开头的 1749322 个字节被度娘替换掉了,整个文件也无法恢复成最初的样子了(至少我不会)。