Chromium资源文件.pak解包方法(python)

最新推荐文章于 2020-12-19 16:23:25 发布

dzhjsofo

最新推荐文章于 2020-12-19 16:23:25 发布

阅读量3.3k

点赞数

分类专栏： chromium

chromium 专栏收录该内容

28 篇文章 0 订阅

订阅专栏

[python]view plaincopyprint? 
   
 import collections  
 import struct  
 import sys  
 def ReadFile(filename, encoding):  
   mode = 'rb' if encoding == 0 else 'rU'  
   with open(filename, mode) as f:  
     data = f.read()  
   if encoding not in (0, 1):  
     data = data.decode(encoding)  
   return data  
   
 PACK_FILE_VERSION = 4  
 HEADER_LENGTH = 2 * 4 + 1  # Two uint32s. (file version, number of entries) and  
                            # one uint8 (encoding of text resources)  
 def UnpackDataPack(input_file):  
   """Reads a data pack file and returns a dictionary."""  
   data = ReadFile(input_file, 0)  
   original_data = data  
   
   # Read the header.  
   version, num_entries, encoding = struct.unpack("<IIB", data[:HEADER_LENGTH])  
   if version != PACK_FILE_VERSION:  
     print "Wrong file version in ", input_file  
     raise WrongFileVersion  
   
   resources = {}  
   if num_entries == 0:  
     return DataPackContents(resources, encoding)  
   
   # Read the index and data.  
   data = data[HEADER_LENGTH:]  
   kIndexEntrySize = 2 + 4  # Each entry is a uint16 and a uint32.  
   for _ in range(num_entries):  
     id, offset = struct.unpack("<HI", data[:kIndexEntrySize])  
     data = data[kIndexEntrySize:]  
     next_id, next_offset = struct.unpack("<HI", data[:kIndexEntrySize])  
     resources[id] = original_data[offset:next_offset]  
     of = open('{0}.png'.format(id),'wb')  
     of.write(original_data[offset:next_offset])  
     of.close()  
 def main():  
   if len(sys.argv) > 1:  
     UnpackDataPack(sys.argv[1])  
   
   
 if __name__ == '__main__':  
   main()  

chromium资源文件在linux下以pak格式文件打包，文件格式很简单，并没有压缩。一般没有解包的必要，因为编译时会自动根据源文件变化而自动生成。资源内容只在运行时由chromium根据资源ID获取，所以在.pak文件中只存储了资源的ID，并没有存储资源文件类型，所以只能默认解包成png格式了。

#python unpack.py chrome_100_percent.pak
就可以解包chrome_100_percent.pak中的png图片资源了。

如果非要获取资源文件类型的话，只能从内容判断了，根据不同文件的文件头可以大致分辨类型：

[python]view plaincopyprint? 
   
 import collections  
 import struct  
 import sys  
 def ReadFile(filename, encoding):  
   mode = 'rb' if encoding == 0 else 'rU'  
   with open(filename, mode) as f:  
     data = f.read()  
   if encoding not in (0, 1):  
     data = data.decode(encoding)  
   return data  
   
 PACK_FILE_VERSION = 4  
 HEADER_LENGTH = 2 * 4 + 1  # Two uint32s. (file version, number of entries) and  
                            # one uint8 (encoding of text resources)  
 def UnpackDataPack(input_file):  
   """Reads a data pack file and returns a dictionary."""  
   data = ReadFile(input_file, 0)  
   original_data = data  
   
   # Read the header.  
   version, num_entries, encoding = struct.unpack("<IIB", data[:HEADER_LENGTH])  
   if version != PACK_FILE_VERSION:  
     print "Wrong file version in ", input_file  
     raise WrongFileVersion  
   
   resources = {}  
   if num_entries == 0:  
     return DataPackContents(resources, encoding)  
   
   # Read the index and data.  
   data = data[HEADER_LENGTH:]  
   kIndexEntrySize = 2 + 4  # Each entry is a uint16 and a uint32.  
   for _ in range(num_entries):  
     id, offset = struct.unpack("<HI", data[:kIndexEntrySize])  
     data = data[kIndexEntrySize:]  
     next_id, next_offset = struct.unpack("<HI", data[:kIndexEntrySize])  
     resources[id] = original_data[offset:next_offset]  
     filetype = 'bin'  
     fileheader = ''.join(original_data[offset:offset+1])  
     print ord(fileheader[0])  
     if fileheader == '<':  
       filetype = 'html'  
     if fileheader == '\x89':  
       filetype = 'png'  
     elif fileheader == '/':  
       filetype = 'js'  
     of = open('{0}.{1}'.format(id,filetype),'wb')  
     of.write(original_data[offset:next_offset])  
     of.close()  
 def main():  
   if len(sys.argv) > 1:  
     UnpackDataPack(sys.argv[1])  
   
   
 if __name__ == '__main__':  
   main()  

这里直接判断每个文件内容的前一个字节，如果是‘<'则应该是html文件，如果是'/'应该是js文件，如果是PNG文件头’\x89PNG',则应该就是png文件了。

dzhjsofo

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
Chromium资源文件.pak解包方法(python)

[python] view plaincopyprint?import collections import struct import sys def ReadFile(filename, encoding): mode = 'rb' if encoding == 0 else 'rU' with open(filename,
复制链接

扫一扫