一、 代码
from rubymarshal.reader import loads
#from rubymarshal.writer import writes
from rubymarshal.classes import RubyObject
class ParseRVDataFile:
"""
解析 RVData 数据类
"""
def __init__(self, file_path):
"""
构造函数
:param file_path: 要解析的 .rvdata 文件路径
"""
# 解析结果
self.result = self.parse_rvdata_file(file_path)
def parse_rvdata_file(self, file_path):
"""
解析 .rvdata 文件函数
:param file_path: 文件路径
:return: 返回解析结果
"""
with open(file=file_path, mode='rb') as file:
byte_text = file.read()
content = [b'\x04\x08' + data for data in byte_text.split(b'\x04\x08') if data != b'']
parse_content = content[0] if len(content) == 1 else content
# 调试用
# print(parse_content)
all_parse_data = [
(index, data) for index, data in enumerate(parse_content)
] if type(parse_content) == type([]) else parse_content
# 调试用
# print(all_parse_data)
result = [
loads(data[1]) for data in all_parse_data
] if type(all_parse_data) == type([]) else loads(all_parse_data)
return result
def get_data(self, result):
"""
返回最终可以看懂的编码结果 (有些情况可能没有考虑到 没解析出来 但大致就是这样做)
:param result: 解析的 .rvdata 文件结果
:return: 无返回值, 只有打印出来的结果
"""
if type(result) == type([]):
print(result)
for data in result:
print(data)
if type(data) == type([]):
for i in data:
if type(i) == type([]):
for j in i:
if type(j) == type(b''):
print(j.decode())
elif type(i) == type(b''):
print(i.decode())
#print('Class Name:', data.ruby_class_name, '\n------All Attributes:------')
elif type(data) == type([]) and type(data[0]) != type(RubyObject()):
print([d.decode() if type(d) != type(1) and d else d for d in data])
elif type(data) == type(b''):
print(data.decode())
elif type(data) == type(RubyObject()):
# print(value.attributes)
for name, v in data.attributes.items():
#print(name, '\t', v)
if type(v) == type([]) and len(v) > 0 and type(v[0]) != type(RubyObject()):
print(name, '\t',
[data.decode() if type(data) == type(b'') and data else data for data in v])
elif type(v) == type(b''):
print(name, '\t', v.decode())
elif type(v) == type(RubyObject()):
print(v.attributes)
else:
pass
elif type(data) == type([]) and type(data[0]) == type(RubyObject()):
for obj in data:
print(obj.attributes)
else:
print(result)
print('Class Name:', result.ruby_class_name, '\n------All Attributes:------')
for key, value in result.attributes.items():
#print(key, '\t', value)
if type(value) == type([]) and type(value[0]) != type(RubyObject()):
print(key, '\t', [data.decode() if type(data) != type(1) and data else data for data in value])
elif type(value) == type(b''):
print(key, '\t', value.decode())
elif type(value) == type(RubyObject()):
#print(value.attributes)
for name, v in value.attributes.items():
#print(name, '\t', v)
if type(v) == type([]) and type(v[0]) != type(RubyObject()):
print(name, '\t', [data.decode() if type(data) != type(1) and data else data for data in v])
elif type(v) == type(b''):
print(name, '\t', v.decode())
elif type(v) == type(RubyObject()):
print(v.attributes)
elif type(value) == type([]) and type(value[0]) == type(RubyObject()):
print(key)
for obj in value:
print(obj.attributes)
if __name__ == '__main__':
# .rvdata 文件路径
file_path = r'D:\MyDownloads\Download\Metempsyc\Metempsyc\Saves\Save13.rvdata'
data = ParseRVDataFile(file_path)
print(data.get_data(data.result))
# 可以对修改后的数据进行保存
# new_file_path = r''
# with open(new_file_path, 'rb') as f:
# print(loads(f.read()))
二、如果解析的过程出错,修改 pip install rubymarshal 这个库中的 reader.py 文件如下:
import io
import re
from rubymarshal.classes import (
UsrMarshal,
Symbol,
UserDef,
Extended,
Module,
RubyString,
RubyObject,
registry as global_registry,
)
from rubymarshal.constants import (
TYPE_NIL,
TYPE_TRUE,
TYPE_FALSE,
TYPE_FIXNUM,
TYPE_IVAR,
TYPE_STRING,
TYPE_SYMBOL,
TYPE_ARRAY,
TYPE_HASH,
TYPE_FLOAT,
TYPE_BIGNUM,
TYPE_REGEXP,
TYPE_USRMARSHAL,
TYPE_SYMLINK,
TYPE_LINK,
TYPE_DATA,
TYPE_OBJECT,
TYPE_STRUCT,
TYPE_MODULE,
TYPE_CLASS,
TYPE_USERDEF,
TYPE_EXTENDED,
)
from rubymarshal.utils import read_ushort, read_sbyte, read_ubyte
__author__ = "Matthieu Gallet"
class Reader:
def __init__(self, fd, registry=None):
self.symbols = []
self.objects = []
self.fd = fd
self.registry = registry or global_registry
def read(self, token=None):
if token is None:
token = self.fd.read(1)
# From https://docs.ruby-lang.org/en/2.1.0/marshal_rdoc.html:
# The stream contains only one copy of each object for all objects except
# true, false, nil, Fixnums and Symbols.
object_index = None
if token in (
TYPE_IVAR,
#TYPE_EXTENDED, TYPE_UCLASS, ????
TYPE_CLASS,
TYPE_MODULE,
TYPE_FLOAT,
TYPE_BIGNUM,
TYPE_REGEXP,
TYPE_ARRAY,
TYPE_HASH,
TYPE_STRUCT,
TYPE_OBJECT,
TYPE_DATA,
TYPE_USRMARSHAL,
):
self.objects.append(None)
object_index = len(self.objects)
result = None
if token == TYPE_NIL:
pass
# 1111111 加的
elif token == b'\x00':
result = ''
# 11111111
elif token == TYPE_TRUE:
result = True
elif token == TYPE_FALSE:
result = False
elif token == TYPE_IVAR:
sub_token = self.fd.read(1)
result = self.read(sub_token)
flags = None
if sub_token == TYPE_REGEXP:
options = ord(self.fd.read(1))
flags = 0
if options & 1:
flags |= re.IGNORECASE
if options & 4:
flags |= re.MULTILINE
attributes = self.read_attributes()
if sub_token in (TYPE_STRING, TYPE_REGEXP):
encoding = self._get_encoding(attributes)
try:
result = result.decode(encoding)
except UnicodeDecodeError as u:
result = result.decode("unicode-escape")
# string instance attributes are discarded
if attributes and sub_token == TYPE_STRING:
result = RubyString(result, attributes)
if sub_token == TYPE_REGEXP:
result = re.compile(str(result), flags)
elif attributes:
result.set_attributes(attributes)
elif token == TYPE_STRING:
size = self.read_long()
result = self.fd.read(size)
elif token == TYPE_SYMBOL:
result = self.read_symreal()
elif token == TYPE_FIXNUM:
result = self.read_long()
elif token == TYPE_ARRAY:
num_elements = self.read_long()
# noinspection PyUnusedLocal
result = [self.read() for x in range(num_elements)]
elif token == TYPE_HASH:
num_elements = self.read_long()
result = {}
# 111111111
i = 0 # 加的
# 111111111
for x in range(num_elements):
key = self.read()
value = self.read()
# 111111111
# 你修改, 源文件一般不支持修改 可以在继承的基础之上改成自己的
if type(key) == type([]):
i += 1
result['key_list'+str(i)] = ((key, value))
result['value'] = value
else:
# 111111111
result[key] = value
result = result
elif token == TYPE_FLOAT:
size = self.read_long()
floatn = self.fd.read(size)
floatn = floatn.split(b"\0")
result = float(floatn[0].decode("utf-8"))
elif token == TYPE_BIGNUM:
sign = 1 if self.fd.read(1) == b"+" else -1
num_elements = self.read_long()
result = 0
factor = 1
for x in range(num_elements):
result += self.read_short() * factor
factor *= 2 ** 16
result *= sign
elif token == TYPE_REGEXP:
size = self.read_long()
result = self.fd.read(size)
elif token == TYPE_USRMARSHAL:
class_symbol = self.read()
if not isinstance(class_symbol, Symbol):
raise ValueError("invalid class name: %r" % class_symbol)
class_name = class_symbol.name
attr_list = self.read()
python_class = self.registry.get(class_name, UsrMarshal)
if not issubclass(python_class, UsrMarshal):
raise ValueError(
"invalid class mapping for %r: %r should be a subclass of %r."
% (class_name, python_class, UsrMarshal)
)
result = python_class(class_name)
result.marshal_load(attr_list)
elif token == TYPE_SYMLINK:
result = self.read_symlink()
elif token == TYPE_LINK:
link_id = self.read_long()
if object_index and link_id >= object_index:
raise ValueError(
"invalid link destination: %d should be lower than %d."
% (link_id, object_index)
)
# 111111111
#print(link_id, self.objects)
if link_id >= len(self.objects):
result = ''
else:
# 111111111
result = self.objects[link_id]
elif token == TYPE_USERDEF:
class_symbol = self.read()
private_data = self.read(TYPE_STRING)
if not isinstance(class_symbol, Symbol):
raise ValueError("invalid class name: %r" % class_symbol)
class_name = class_symbol.name
python_class = self.registry.get(class_name, UserDef)
if not issubclass(python_class, UserDef):
raise ValueError(
"invalid class mapping for %r: %r should be a subclass of %r."
% (class_name, python_class, UserDef)
)
result = python_class(class_name)
# noinspection PyProtectedMember
result._load(private_data)
elif token == TYPE_MODULE:
data = self.read(TYPE_STRING)
module_name = data.decode()
result = Module(module_name, None)
elif token == TYPE_OBJECT:
class_symbol = self.read()
assert isinstance(class_symbol, Symbol)
class_name = class_symbol.name
python_class = self.registry.get(class_name, RubyObject)
if not issubclass(python_class, RubyObject):
raise ValueError(
"invalid class mapping for %r: %r should be a subclass of %r."
% (class_name, python_class, RubyObject)
)
attributes = self.read_attributes()
result = python_class(class_name, attributes)
elif token == TYPE_EXTENDED:
class_name = self.read(TYPE_STRING)
result = Extended(class_name, None)
elif token == TYPE_CLASS:
data = self.read(TYPE_STRING)
class_name = data.decode()
if class_name in self.registry:
result = self.registry[class_name]
else:
result = type(
class_name.rpartition(":")[2],
(RubyObject,),
{"ruby_class_name": class_name},
)
else:
raise ValueError("token %s is not recognized" % token)
if object_index is not None:
self.objects[object_index - 1] = result
return result
@staticmethod
def _get_encoding(attrs):
encoding = "latin1"
if attrs.get("E") is True:
encoding = "utf-8"
elif "encoding" in attrs:
encoding = attrs["encoding"].decode()
return encoding
def read_attributes(self):
attr_count = self.read_long()
attrs = {}
for x in range(attr_count):
attr_name = self.read()
attr_value = self.read()
# 你改的
if type(attr_name) == '<class int>':
attrs[attr_name.name] = attr_value
else:
attrs[attr_name] = attr_value
return attrs
def read_short(self):
return read_ushort(self.fd)
def read_long(self):
length = read_sbyte(self.fd)
if length == 0:
return 0
if 5 < length < 128:
return length - 5
elif -129 < length < -5:
return length + 5
result = 0
factor = 1
for s in range(abs(length)):
result += read_ubyte(self.fd) * factor
factor *= 256
if length < 0:
result = result - factor
return result
def read_symbol(self):
ivar = 0
while True:
token = self.fd.read(1)
if token == TYPE_IVAR:
ivar = 1
continue
elif token == TYPE_SYMBOL:
return self.read_symreal()
elif token == TYPE_SYMLINK:
if ivar:
raise ValueError("dump format error (symlink with encoding)")
return self.read_symlink()
raise ValueError("error while reading symbol with token %r" % token)
def read_symlink(self):
symlink_id = self.read_long()
return self.symbols[symlink_id]
def read_symreal(self):
size = self.read_long()
result = self.fd.read(size)
result = Symbol(result.decode("utf-8"))
# 你改的一句 加了判断条件
#if result.decode("utf-8") or result:
#result = Symbol(result.decode("iso-8859-1"))
self.symbols.append(result)
return result
def load(fd, registry=None):
assert fd.read(1) == b"\x04"
assert fd.read(1) == b"\x08"
loader = Reader(fd, registry=registry)
return loader.read()
def loads(byte_text, registry=None):
return load(io.BytesIO(byte_text), registry=registry)
注:
1. 该解析函数解析的可能不完善;
2. 最好可以写一个继承 reader.py 文件中 Reader 类的类。
使用实例,比如存档格式为 .rvdata 的一个单机游戏:
假如有一个初始存档进度的文件 Save30.rvdata
游戏界面拥有 1000 金币
游戏的事件记录
角色装备也没几个
而我想修改游戏数据:
首先,更改读取的.rvdata文件路径为
执行完代码的部分结果
这里面有我们认识的编码,也有不认识的编码,我们可以根据规律和认识的编码部分对其进行修改
然后,我对一些数据作如下修改:
from rubymarshal.reader import loads
from rubymarshal.writer import writes
from rubymarshal.classes import Symbol
if __name__ == '__main__':
# .rvdata 文件路径
file_path = r'D:\MyDownloads\Download\Metempsyc\Metempsyc\Saves\Save30.rvdata'
# 读取原文件的数据内容
text = b''
with open(file_path, 'rb') as fd:
text = fd.read()
# print("这里是没改之前的数据:\n", text) # 打印结果
# exit()
# 进行解码看看
content = loads(text)
# print(content) # 打印结果 ??? 发现怎么解码完怎么就一行数据 [[b'\xe4\xb8\xbb\xe8\xa7\x92', 0]]
# exit()
# 发现原始数据中的很多个数据的相似之处 都以 b'\x04\x08' 开头 那试试以这个划分展开看看
content = text.split(b'\x04\x08')
# print(content) # 打印结果 发现 数据变多了 怎么还多个空 byte 这个数据应该不需要
# exit()
# print(content[8]) # 看其中一个结果 为什么看这个结果 发现 它有个 Game_Variables
# exit()
# learning example:
# 根据解析出来的规律 想看看游戏变量 Game_Variables
# content[8] = b'o:\x13Game_Variables\x06:\n@data[\x02A\x0100000000i\x02\x8a\x01000i\x02\xe8\x03i\x02\xe7\x03i\x06i\ti\ti\x0e000i\x000ihiNiHiQih000iV0ihihihihi\tiz00i\x07i\x0fi\x02\xdc\x93i#i\x00i\ti\x06i\x07i\x027%i\ti\x060i\x0b0i\x0ei\t0i\x00i\x19i\x060i\t0i\x00000i\x060ih0000000000i\x02Y\x01i\x02^\x01000000000000000000i\x00i\x00i\x00i\x000i\ti\x100i\x03\xb0\x00\ti\x03\x04A\xb3i}i\x01\xfa0iUi\x0f000000i\r0i\x0bi\x1300000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000i\x00i\x00i\x00i\x00i\x00i\x00i\x00i\x00i\x00i\r0000000000i\r00000i\x00i\x00i\x000ihihihihihihi\x1ai_ihihi\x02\x8a\x02i\x02 Ni\x03?\xa2\x010i\x00i\x00i\x00i\x000i\x00000000i\x00i\x00i\x000i`i\x02!\x03i\x02D!i\x02\x8a\xb100i\x02\xbc\x01i\x0fi\x11i\x02\xf0\x9di\x02\xec\x9d0i\x00i\x00i\x00i\x00i\x0000i\x01\xcci\x02L\x02i\x02\x1b\x01i\x02\xf6\x01i\x02$\x02i\x00i\x00i\x00i\x00iii\x00i\x00i\x00i\x028\x01i\x02\xd1\x02i\x02\xea\x01i\x02\\\x01i\x00i\x00i\x00ie'
# 拆分数组的时候把 b'\x04\x08' 删掉了 需要加回来
# text = b'\x04\x08'.join(content)
for i in range(len(content)):
if i == 0:
continue
content[i] = b'\x04\x08' + content[i]
# print(content)
# exit()
"""
learning example
temp = loads(content[8])
print(temp)
temp.attributes[Symbol("@data")][12] = 1314
temp.attributes[Symbol("@data")][13] = 1314
temp.attributes[Symbol("@data")][14] = 0
print(temp)
# content[8] = writes(temp)
# print(content[8])
temp.attributes[Symbol("@gold")] = 999999999
temp.attributes[Symbol("@armors")] = {
20: 1, 30: 4, 32: 1, 34: 1, 62: 1, 64: 1, 66: 1, 79: 1, 81: 3, 83: 1, 85: 1, 87: 1, 115: 4, 116: 4, 117: 4
}
temp.attributes[Symbol("@weapons")] = {
32: 2, 68: 2, 70: 1, 72: 1, 74: 1, 120: 1, 122: 1, 168: 1, 170: 1, 172: 1, 228: 4
}
temp.attributes[Symbol("@items")] = {
26: 99, 28: 99, 29: 99, 30: 99, 86: 99, 88: 1, 129: 1, 143: 1, 159: 99, 162: 1, 163: 99, 164: 99
}
print(temp)
# content[8] = writes(temp)
# print(content[8])
temp = loads(content[11])
content[11] = writes(temp)
print(content[11])
text = b''.join(content)
print(text)
"""
# 解码 content[8] 中的内容 想改别的就换范围尝试 一个一个改
result = b''
for i in content[8:9]:
# print(i)
# exit()
result = loads(i)
# 这里可以看到 Game_Variables 的所有 Symbol("@data") 数据
print("这里是 content[8]:\n", result.ruby_class_name, result) # 打印结果
# exit()
# 然后尝试修改其中的一些数据 这里是事件记录的数据
result.attributes[Symbol("@data")][12] = 1314
result.attributes[Symbol("@data")][13] = 1314
result.attributes[Symbol("@data")][14] = 0
# print(result) # 打印结果看看 对比发现改掉了
# exit()
# 继续修改
result.attributes[Symbol("@gold")] = 999999999 # 这个看名字就和金币相关
result.attributes[Symbol("@armors")] = {
20: 1, 30: 4, 32: 1, 34: 1, 62: 1, 64: 1, 66: 1, 79: 1, 81: 3, 83: 1, 85: 1, 87: 1, 115: 4, 116: 4, 117: 4
} # 这个看名字就和防具相关
result.attributes[Symbol("@weapons")] = {
32: 2, 68: 2, 70: 1, 72: 1, 74: 1, 120: 1, 122: 1, 168: 1, 170: 1, 172: 1, 228: 4
} # 这个看名字就和武器相关
result.attributes[Symbol("@items")] = {
26: 99, 28: 99, 29: 99, 30: 99, 86: 99, 88: 1, 129: 1, 143: 1, 159: 99, 162: 1, 163: 99, 164: 99
} # 这个看名字就和道具相关
print("这里是 content[8] 修改后的:\n", result) # 打印结果看看 发现也改掉了
# exit()
# 上面代码就只修改了 content[8] 的数据 接下来需要再编码回去
content[8] = writes(result)
# print(content[8]) # 打印结果 发现编码回去了
# exit()
# 经过检测发现 部分数据没改掉 继续试试改相关的 content[11] 的数据 发现它里面也有 gold armors weapons items
# 继续修改
for i in content[11:12]:
# print(i)
# exit()
result = loads(i)
# 这里可以看到 Game_Party 的所有 Symbol("@data") 数据
# print("这里是 content[11]:\n", result.ruby_class_name, result) # 打印结果
# exit()
result.attributes[Symbol("@gold")] = 999999999 # 这个看名字就和金币相关
result.attributes[Symbol("@armors")] = {
20: 1, 30: 4, 32: 1, 34: 1, 62: 1, 64: 1, 66: 1, 79: 1, 81: 3, 83: 1, 85: 1, 87: 1, 115: 4, 116: 4, 117: 4
} # 这个看名字就和防具相关
result.attributes[Symbol("@weapons")] = {
32: 2, 68: 2, 70: 1, 72: 1, 74: 1, 120: 1, 122: 1, 168: 1, 170: 1, 172: 1, 228: 4
} # 这个看名字就和武器相关
result.attributes[Symbol("@items")] = {
26: 99, 28: 99, 29: 99, 30: 99, 86: 99, 88: 1, 129: 1, 143: 1, 159: 99, 162: 1, 163: 99, 164: 99
} # 这个看名字就和道具相关
# print("这里是 content[11] 修改后的:\n", result) # 打印结果看看 发现也改掉了
# exit()
# 上面代码就只修改了 content[11] 的数据 接下来需要再编码回去
content[11] = writes(result)
# print(content[11]) # 打印结果 发现编码回去了
# exit()
# 拼接回去
content = b''.join(content)
# print("从这里是改了之后的数据:\n", content) # 打印结果 和改之前对比 发现形式差不多 不知道会不会出问题
# exit()
# 以防万一 按原路返回 再编码一下
# text = writes(content)
# print(text) # 打印结果 发现和改之前还是有些区别 就先不要这里
# exit()
# 对修改后的数据保存回原文件
text = content
with open(r'D:\MyDownloads\Download\Metempsyc\Metempsyc\Saves\Save30.rvdata', 'wb') as f:
f.write(text)
执行结果
最后,去看看存档的数据