使用 python rubymarshal 解析 .rvdata 数据文件

一、 代码

from rubymarshal.reader import loads
#from rubymarshal.writer import writes
from rubymarshal.classes import RubyObject


class ParseRVDataFile:
    """
    解析 RVData 数据类
    """

    def __init__(self, file_path):
        """
        构造函数

        :param file_path: 要解析的 .rvdata 文件路径
        """

        # 解析结果
        self.result = self.parse_rvdata_file(file_path)

    def parse_rvdata_file(self, file_path):
        """
        解析 .rvdata 文件函数

        :param file_path: 文件路径
        :return: 返回解析结果
        """

        with open(file=file_path, mode='rb') as file:
            byte_text = file.read()

        content = [b'\x04\x08' + data for data in byte_text.split(b'\x04\x08') if data != b'']
        parse_content = content[0] if len(content) == 1 else content

        # 调试用
        # print(parse_content)

        all_parse_data = [
            (index, data) for index, data in enumerate(parse_content)
        ] if type(parse_content) == type([]) else parse_content

        # 调试用
        # print(all_parse_data)

        result = [
            loads(data[1]) for data in all_parse_data
        ] if type(all_parse_data) == type([]) else loads(all_parse_data)

        return result

    def get_data(self, result):
        """
        返回最终可以看懂的编码结果 (有些情况可能没有考虑到 没解析出来 但大致就是这样做)

        :param result: 解析的 .rvdata 文件结果
        :return: 无返回值, 只有打印出来的结果
        """

        if type(result) == type([]):
            print(result)
            for data in result:
                print(data)
                if type(data) == type([]):
                    for i in data:
                        if type(i) == type([]):
                            for j in i:
                                if type(j) == type(b''):
                                    print(j.decode())
                        elif type(i) == type(b''):
                            print(i.decode())
                #print('Class Name:', data.ruby_class_name, '\n------All Attributes:------')
                elif type(data) == type([]) and type(data[0]) != type(RubyObject()):
                    print([d.decode() if type(d) != type(1) and d else d for d in data])
                elif type(data) == type(b''):
                    print(data.decode())
                elif type(data) == type(RubyObject()):
                    # print(value.attributes)
                    for name, v in data.attributes.items():
                        #print(name, '\t', v)
                        if type(v) == type([]) and len(v) > 0 and type(v[0]) != type(RubyObject()):
                            print(name, '\t',
                                [data.decode() if type(data) == type(b'') and data else data for data in v])
                        elif type(v) == type(b''):
                            print(name, '\t', v.decode())
                        elif type(v) == type(RubyObject()):
                            print(v.attributes)
                        else:
                            pass
                elif type(data) == type([]) and type(data[0]) == type(RubyObject()):
                        for obj in data:
                            print(obj.attributes)
        else:
            print(result)
            print('Class Name:', result.ruby_class_name, '\n------All Attributes:------')
            for key, value in result.attributes.items():
                #print(key, '\t', value)
                if type(value) == type([]) and type(value[0]) != type(RubyObject()):
                    print(key, '\t', [data.decode() if type(data) != type(1) and data else data for data in value])
                elif type(value) == type(b''):
                    print(key, '\t', value.decode())
                elif type(value) == type(RubyObject()):
                    #print(value.attributes)
                    for name, v in value.attributes.items():
                        #print(name, '\t', v)
                        if type(v) == type([]) and type(v[0]) != type(RubyObject()):
                            print(name, '\t', [data.decode() if type(data) != type(1) and data else data for data in v])
                        elif type(v) == type(b''):
                            print(name, '\t', v.decode())
                        elif type(v) == type(RubyObject()):
                            print(v.attributes)
                elif type(value) == type([]) and type(value[0]) == type(RubyObject()):
                    print(key)
                    for obj in value:
                        print(obj.attributes)


if __name__ == '__main__':

    # .rvdata 文件路径
    file_path = r'D:\MyDownloads\Download\Metempsyc\Metempsyc\Saves\Save13.rvdata'

    data = ParseRVDataFile(file_path)
    print(data.get_data(data.result))

    # 可以对修改后的数据进行保存
    # new_file_path = r''
    # with open(new_file_path, 'rb') as f:
    #   print(loads(f.read()))

二、如果解析的过程出错,修改 pip install rubymarshal 这个库中的 reader.py 文件如下:

import io
import re

from rubymarshal.classes import (
    UsrMarshal,
    Symbol,
    UserDef,
    Extended,
    Module,
    RubyString,
    RubyObject,
    registry as global_registry,
)
from rubymarshal.constants import (
    TYPE_NIL,
    TYPE_TRUE,
    TYPE_FALSE,
    TYPE_FIXNUM,
    TYPE_IVAR,
    TYPE_STRING,
    TYPE_SYMBOL,
    TYPE_ARRAY,
    TYPE_HASH,
    TYPE_FLOAT,
    TYPE_BIGNUM,
    TYPE_REGEXP,
    TYPE_USRMARSHAL,
    TYPE_SYMLINK,
    TYPE_LINK,
    TYPE_DATA,
    TYPE_OBJECT,
    TYPE_STRUCT,
    TYPE_MODULE,
    TYPE_CLASS,
    TYPE_USERDEF,
    TYPE_EXTENDED,
)
from rubymarshal.utils import read_ushort, read_sbyte, read_ubyte

__author__ = "Matthieu Gallet"


class Reader:
    def __init__(self, fd, registry=None):
        self.symbols = []
        self.objects = []
        self.fd = fd
        self.registry = registry or global_registry

    def read(self, token=None):
        if token is None:
            token = self.fd.read(1)

        # From https://docs.ruby-lang.org/en/2.1.0/marshal_rdoc.html:
        # The stream contains only one copy of each object for all objects except
        # true, false, nil, Fixnums and Symbols.
        object_index = None
        if token in (
            TYPE_IVAR,
            #TYPE_EXTENDED, TYPE_UCLASS, ????
            TYPE_CLASS,
            TYPE_MODULE,
            TYPE_FLOAT,
            TYPE_BIGNUM,
            TYPE_REGEXP,
            TYPE_ARRAY,
            TYPE_HASH,
            TYPE_STRUCT,
            TYPE_OBJECT,
            TYPE_DATA,
            TYPE_USRMARSHAL,
        ):
            self.objects.append(None)
            object_index = len(self.objects)

        result = None
        if token == TYPE_NIL:
            pass
        
        # 1111111 加的
        elif token == b'\x00':
            result = ''
        # 11111111
        
        elif token == TYPE_TRUE:
            result = True
        elif token == TYPE_FALSE:
            result = False
        elif token == TYPE_IVAR:
            sub_token = self.fd.read(1)
            result = self.read(sub_token)
            flags = None
            if sub_token == TYPE_REGEXP:
                options = ord(self.fd.read(1))
                flags = 0
                if options & 1:
                    flags |= re.IGNORECASE
                if options & 4:
                    flags |= re.MULTILINE
            attributes = self.read_attributes()
            if sub_token in (TYPE_STRING, TYPE_REGEXP):
                encoding = self._get_encoding(attributes)
                try:
                    result = result.decode(encoding)
                except UnicodeDecodeError as u:
                    result = result.decode("unicode-escape")
            # string instance attributes are discarded
            if attributes and sub_token == TYPE_STRING:
                result = RubyString(result, attributes)
            if sub_token == TYPE_REGEXP:
                result = re.compile(str(result), flags)
            elif attributes:
                result.set_attributes(attributes)
        elif token == TYPE_STRING:
            size = self.read_long()
            result = self.fd.read(size)
        elif token == TYPE_SYMBOL:
            result = self.read_symreal()
        elif token == TYPE_FIXNUM:
            result = self.read_long()
        elif token == TYPE_ARRAY:
            num_elements = self.read_long()
            # noinspection PyUnusedLocal
            result = [self.read() for x in range(num_elements)]
        elif token == TYPE_HASH:
            num_elements = self.read_long()
            result = {}
            
            # 111111111
            i = 0 # 加的
            # 111111111
            
            for x in range(num_elements):
                key = self.read()                    
                value = self.read()

                # 111111111
                # 你修改, 源文件一般不支持修改 可以在继承的基础之上改成自己的
                if type(key) == type([]):
                    i += 1
                    result['key_list'+str(i)] = ((key, value))
                    result['value'] = value
                else:
                # 111111111
                    
                    result[key] = value
            result = result
        elif token == TYPE_FLOAT:
            size = self.read_long()
            floatn = self.fd.read(size)
            floatn = floatn.split(b"\0")
            result = float(floatn[0].decode("utf-8"))
        elif token == TYPE_BIGNUM:
            sign = 1 if self.fd.read(1) == b"+" else -1
            num_elements = self.read_long()
            result = 0
            factor = 1
            for x in range(num_elements):
                result += self.read_short() * factor
                factor *= 2 ** 16
            result *= sign
        elif token == TYPE_REGEXP:
            size = self.read_long()
            result = self.fd.read(size)
        elif token == TYPE_USRMARSHAL:
            class_symbol = self.read()
            if not isinstance(class_symbol, Symbol):
                raise ValueError("invalid class name: %r" % class_symbol)
            class_name = class_symbol.name
            attr_list = self.read()
            python_class = self.registry.get(class_name, UsrMarshal)
            if not issubclass(python_class, UsrMarshal):
                raise ValueError(
                    "invalid class mapping for %r: %r should be a subclass of %r."
                    % (class_name, python_class, UsrMarshal)
                )
            result = python_class(class_name)
            result.marshal_load(attr_list)
        elif token == TYPE_SYMLINK:
            result = self.read_symlink()
        elif token == TYPE_LINK:
            link_id = self.read_long()

            if object_index and link_id >= object_index:
                raise ValueError(
                    "invalid link destination: %d should be lower than %d."
                    % (link_id, object_index)
                )

            # 111111111
            #print(link_id, self.objects)
            if link_id >= len(self.objects):
                result = ''
            else:
            # 111111111
            
                result = self.objects[link_id]
        elif token == TYPE_USERDEF:
            class_symbol = self.read()
            private_data = self.read(TYPE_STRING)
            if not isinstance(class_symbol, Symbol):
                raise ValueError("invalid class name: %r" % class_symbol)
            class_name = class_symbol.name
            python_class = self.registry.get(class_name, UserDef)
            if not issubclass(python_class, UserDef):
                raise ValueError(
                    "invalid class mapping for %r: %r should be a subclass of %r."
                    % (class_name, python_class, UserDef)
                )
            result = python_class(class_name)
            # noinspection PyProtectedMember
            result._load(private_data)
        elif token == TYPE_MODULE:
            data = self.read(TYPE_STRING)
            module_name = data.decode()
            result = Module(module_name, None)
        elif token == TYPE_OBJECT:
            class_symbol = self.read()
            assert isinstance(class_symbol, Symbol)
            class_name = class_symbol.name
            python_class = self.registry.get(class_name, RubyObject)
            if not issubclass(python_class, RubyObject):
                raise ValueError(
                    "invalid class mapping for %r: %r should be a subclass of %r."
                    % (class_name, python_class, RubyObject)
                )
            attributes = self.read_attributes()
            result = python_class(class_name, attributes)
        elif token == TYPE_EXTENDED:
            class_name = self.read(TYPE_STRING)
            result = Extended(class_name, None)
        elif token == TYPE_CLASS:
            data = self.read(TYPE_STRING)
            class_name = data.decode()
            if class_name in self.registry:
                result = self.registry[class_name]
            else:
                result = type(
                    class_name.rpartition(":")[2],
                    (RubyObject,),
                    {"ruby_class_name": class_name},
                )
        else:
            raise ValueError("token %s is not recognized" % token)
        if object_index is not None:
            self.objects[object_index - 1] = result
        return result

    @staticmethod
    def _get_encoding(attrs):
        encoding = "latin1"
        if attrs.get("E") is True:
            encoding = "utf-8"
        elif "encoding" in attrs:
            encoding = attrs["encoding"].decode()
        return encoding

    def read_attributes(self):
        attr_count = self.read_long()
        attrs = {}
        for x in range(attr_count):
            attr_name = self.read()
            attr_value = self.read()
            # 你改的
            if type(attr_name) == '<class int>':
                attrs[attr_name.name] = attr_value
            else:
                
                attrs[attr_name] = attr_value
        return attrs

    def read_short(self):
        return read_ushort(self.fd)

    def read_long(self):
        length = read_sbyte(self.fd)
        if length == 0:
            return 0
        if 5 < length < 128:
            return length - 5
        elif -129 < length < -5:
            return length + 5
        result = 0
        factor = 1
        for s in range(abs(length)):
            result += read_ubyte(self.fd) * factor
            factor *= 256
        if length < 0:
            result = result - factor
        return result

    def read_symbol(self):
        ivar = 0
        while True:
            token = self.fd.read(1)
            if token == TYPE_IVAR:
                ivar = 1
                continue
            elif token == TYPE_SYMBOL:
                return self.read_symreal()
            elif token == TYPE_SYMLINK:
                if ivar:
                    raise ValueError("dump format error (symlink with encoding)")
                return self.read_symlink()
            raise ValueError("error while reading symbol with token %r" % token)

    def read_symlink(self):
        symlink_id = self.read_long()
        return self.symbols[symlink_id]

    def read_symreal(self):
        size = self.read_long()
        result = self.fd.read(size)
        result = Symbol(result.decode("utf-8"))
        # 你改的一句 加了判断条件
        #if result.decode("utf-8") or result:
        #result = Symbol(result.decode("iso-8859-1"))
        self.symbols.append(result)
        return result


def load(fd, registry=None):
    assert fd.read(1) == b"\x04"
    assert fd.read(1) == b"\x08"

    loader = Reader(fd, registry=registry)
    return loader.read()


def loads(byte_text, registry=None):
    return load(io.BytesIO(byte_text), registry=registry)

注:
1. 该解析函数解析的可能不完善;
2. 最好可以写一个继承 reader.py 文件中 Reader 类的类。

使用实例,比如存档格式为 .rvdata 的一个单机游戏:
在这里插入图片描述

假如有一个初始存档进度的文件 Save30.rvdata
在这里插入图片描述

游戏界面拥有 1000 金币
在这里插入图片描述
游戏的事件记录
在这里插入图片描述

角色装备也没几个
在这里插入图片描述

而我想修改游戏数据:
首先,更改读取的.rvdata文件路径为
在这里插入图片描述
执行完代码的部分结果
在这里插入图片描述

在这里插入图片描述
在这里插入图片描述
这里面有我们认识的编码,也有不认识的编码,我们可以根据规律和认识的编码部分对其进行修改
然后,我对一些数据作如下修改:

from rubymarshal.reader import loads
from rubymarshal.writer import writes
from rubymarshal.classes import Symbol


if __name__ == '__main__':

    # .rvdata 文件路径
    file_path = r'D:\MyDownloads\Download\Metempsyc\Metempsyc\Saves\Save30.rvdata'

    # 读取原文件的数据内容
    text = b''
    with open(file_path, 'rb') as fd:
        text = fd.read()
    # print("这里是没改之前的数据:\n", text)    # 打印结果
    # exit()

    # 进行解码看看
    content = loads(text)
    # print(content)     # 打印结果  ???  发现怎么解码完怎么就一行数据 [[b'\xe4\xb8\xbb\xe8\xa7\x92', 0]]
    # exit()

    # 发现原始数据中的很多个数据的相似之处  都以 b'\x04\x08' 开头  那试试以这个划分展开看看
    content = text.split(b'\x04\x08')
    # print(content)     # 打印结果    发现 数据变多了 怎么还多个空 byte 这个数据应该不需要
    # exit()
    # print(content[8])  # 看其中一个结果  为什么看这个结果 发现 它有个 Game_Variables
    # exit()

    # learning example:
    # 根据解析出来的规律 想看看游戏变量 Game_Variables
    # content[8] = b'o:\x13Game_Variables\x06:\n@data[\x02A\x0100000000i\x02\x8a\x01000i\x02\xe8\x03i\x02\xe7\x03i\x06i\ti\ti\x0e000i\x000ihiNiHiQih000iV0ihihihihi\tiz00i\x07i\x0fi\x02\xdc\x93i#i\x00i\ti\x06i\x07i\x027%i\ti\x060i\x0b0i\x0ei\t0i\x00i\x19i\x060i\t0i\x00000i\x060ih0000000000i\x02Y\x01i\x02^\x01000000000000000000i\x00i\x00i\x00i\x000i\ti\x100i\x03\xb0\x00\ti\x03\x04A\xb3i}i\x01\xfa0iUi\x0f000000i\r0i\x0bi\x1300000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000i\x00i\x00i\x00i\x00i\x00i\x00i\x00i\x00i\x00i\r0000000000i\r00000i\x00i\x00i\x000ihihihihihihi\x1ai_ihihi\x02\x8a\x02i\x02 Ni\x03?\xa2\x010i\x00i\x00i\x00i\x000i\x00000000i\x00i\x00i\x000i`i\x02!\x03i\x02D!i\x02\x8a\xb100i\x02\xbc\x01i\x0fi\x11i\x02\xf0\x9di\x02\xec\x9d0i\x00i\x00i\x00i\x00i\x0000i\x01\xcci\x02L\x02i\x02\x1b\x01i\x02\xf6\x01i\x02$\x02i\x00i\x00i\x00i\x00iii\x00i\x00i\x00i\x028\x01i\x02\xd1\x02i\x02\xea\x01i\x02\\\x01i\x00i\x00i\x00ie'

    # 拆分数组的时候把 b'\x04\x08' 删掉了 需要加回来
    # text = b'\x04\x08'.join(content)
    for i in range(len(content)):
        if i == 0:
            continue
        content[i] = b'\x04\x08' + content[i]
    # print(content)
    # exit()

    """
    learning example
    
    temp = loads(content[8])
    print(temp)

    temp.attributes[Symbol("@data")][12] = 1314
    temp.attributes[Symbol("@data")][13] = 1314
    temp.attributes[Symbol("@data")][14] = 0
    print(temp)

    # content[8] = writes(temp)
    # print(content[8])

    temp.attributes[Symbol("@gold")] = 999999999
    temp.attributes[Symbol("@armors")] = {
        20: 1, 30: 4, 32: 1, 34: 1, 62: 1, 64: 1, 66: 1, 79: 1, 81: 3, 83: 1, 85: 1, 87: 1, 115: 4, 116: 4, 117: 4
    }
    temp.attributes[Symbol("@weapons")] = {
        32: 2, 68: 2, 70: 1, 72: 1, 74: 1, 120: 1, 122: 1, 168: 1, 170: 1, 172: 1, 228: 4
    }
    temp.attributes[Symbol("@items")] = {
        26: 99, 28: 99, 29: 99, 30: 99, 86: 99, 88: 1, 129: 1, 143: 1, 159: 99, 162: 1, 163: 99, 164: 99
    }

    print(temp)
    # content[8] = writes(temp)
    # print(content[8])
    
    temp = loads(content[11])
    content[11] = writes(temp)
    print(content[11])

    text = b''.join(content)
    print(text)
    
    """

    # 解码 content[8] 中的内容 想改别的就换范围尝试 一个一个改
    result = b''
    for i in content[8:9]:
        # print(i)
        # exit()
        result = loads(i)

        # 这里可以看到 Game_Variables 的所有 Symbol("@data") 数据
        print("这里是 content[8]:\n", result.ruby_class_name, result)    # 打印结果
        # exit()

        # 然后尝试修改其中的一些数据 这里是事件记录的数据
        result.attributes[Symbol("@data")][12] = 1314
        result.attributes[Symbol("@data")][13] = 1314
        result.attributes[Symbol("@data")][14] = 0
        # print(result)   # 打印结果看看 对比发现改掉了
        # exit()

        # 继续修改
        result.attributes[Symbol("@gold")] = 999999999  # 这个看名字就和金币相关
        result.attributes[Symbol("@armors")] = {
            20: 1, 30: 4, 32: 1, 34: 1, 62: 1, 64: 1, 66: 1, 79: 1, 81: 3, 83: 1, 85: 1, 87: 1, 115: 4, 116: 4, 117: 4
        }  # 这个看名字就和防具相关
        result.attributes[Symbol("@weapons")] = {
            32: 2, 68: 2, 70: 1, 72: 1, 74: 1, 120: 1, 122: 1, 168: 1, 170: 1, 172: 1, 228: 4
        }  # 这个看名字就和武器相关
        result.attributes[Symbol("@items")] = {
            26: 99, 28: 99, 29: 99, 30: 99, 86: 99, 88: 1, 129: 1, 143: 1, 159: 99, 162: 1, 163: 99, 164: 99
        }  # 这个看名字就和道具相关
        print("这里是 content[8] 修改后的:\n", result)  # 打印结果看看 发现也改掉了
        # exit()

    # 上面代码就只修改了 content[8] 的数据 接下来需要再编码回去
    content[8] = writes(result)
    # print(content[8])   # 打印结果  发现编码回去了
    # exit()

    # 经过检测发现 部分数据没改掉 继续试试改相关的 content[11] 的数据 发现它里面也有 gold armors weapons items

    # 继续修改
    for i in content[11:12]:
        # print(i)
        # exit()
        result = loads(i)

        # 这里可以看到 Game_Party 的所有 Symbol("@data") 数据
        # print("这里是 content[11]:\n", result.ruby_class_name, result)  # 打印结果
        # exit()

        result.attributes[Symbol("@gold")] = 999999999     # 这个看名字就和金币相关
        result.attributes[Symbol("@armors")] = {
            20: 1, 30: 4, 32: 1, 34: 1, 62: 1, 64: 1, 66: 1, 79: 1, 81: 3, 83: 1, 85: 1, 87: 1, 115: 4, 116: 4, 117: 4
        }            # 这个看名字就和防具相关
        result.attributes[Symbol("@weapons")] = {
            32: 2, 68: 2, 70: 1, 72: 1, 74: 1, 120: 1, 122: 1, 168: 1, 170: 1, 172: 1, 228: 4
        }            # 这个看名字就和武器相关
        result.attributes[Symbol("@items")] = {
            26: 99, 28: 99, 29: 99, 30: 99, 86: 99, 88: 1, 129: 1, 143: 1, 159: 99, 162: 1, 163: 99, 164: 99
        }            # 这个看名字就和道具相关
        # print("这里是 content[11] 修改后的:\n", result)    # 打印结果看看 发现也改掉了
        # exit()

    # 上面代码就只修改了 content[11] 的数据 接下来需要再编码回去
    content[11] = writes(result)
    # print(content[11])   # 打印结果  发现编码回去了
    # exit()

    # 拼接回去
    content = b''.join(content)
    # print("从这里是改了之后的数据:\n", content)       # 打印结果  和改之前对比 发现形式差不多 不知道会不会出问题
    # exit()

    # 以防万一 按原路返回 再编码一下
    # text = writes(content)
    # print(text)         # 打印结果   发现和改之前还是有些区别 就先不要这里
    # exit()

    # 对修改后的数据保存回原文件
    text = content
    with open(r'D:\MyDownloads\Download\Metempsyc\Metempsyc\Saves\Save30.rvdata', 'wb') as f:
        f.write(text)


        

执行结果
在这里插入图片描述

在这里插入图片描述

在这里插入图片描述
最后,去看看存档的数据
在这里插入图片描述

在这里插入图片描述

在这里插入图片描述

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值