Prologue
用到别人的框架(前篇),代码是用 lua 写的,要将数据处理成.t7
文件格式。记下.mat
和.t7
互转的方法
.mat → \rightarrow → .t7
step 1
python 用np.load()
从 .npy 文件中读出 np.ndarray,再用 scipy.io.savemat()
将 np.ndarray 存成 .mat 文件
- numpy.load
- scipy.io.savemat
- savemat() 的第二个参数要是
dict
类型
# save_as_mat.py
import numpy as np
from os.path import join
from scipy.io import savemat
# read ndarray from `.npy`
img = np.load(join(DATA_P, 'image_feature.npy'))
txt = np.load(join(DATA_P, 'text_feature.npy'))
lab = np.load(join(DATA_P, 'label.npy'))
# make a `dict`
data = {'img': img, 'txt': txt, 'lab': lab}
# save as `.mat`
savemat('all_data.mat', data)
step 2
lua 用matio.load()
读上述 .mat 文件,读出来的是torch.*Tensor
-- save_as_t7.lua
local matio = require('matio')
data = matio.load('all_data.mat')
print(data)
img = data.img -- FloatTensor
txt = data.txt -- IntTensor
lab = data.lab -- LongTensor
print(img:size()) -- (2866, 1024)
print(txt:size()) -- (2866, 201)
print(lab:size()) -- (1, 2866)
运行结果
{
txt : IntTensor - size: 2866x201
lab : LongTensor - size: 1x2866
img : FloatTensor - size: 2866x1024
}
2866
1024
[torch.LongStorage of size 2]
2866
201
[torch.LongStorage of size 2]
1
2866
[torch.LongStorage of size 2]
step 3
lua 用torch.save()
将 image 和 text 的 feature 按类各存成一个 .t7 文件(承接上面那份 lua 代码)
- File.lua 里的
torch.save
-- save_as_t7.lua (continue from above)
require('torch')
lab = lab:reshape(lab:size(2)) -- lua 下标从 1 开始
-- print(lab:size(1)) -- 2866
-- print(torch.max(lab)) -- 9
-- print(torch.min(lab)) -- 0
-- 统计每个类的样本数
c_cnt = {}
for i = 0, 9 do -- 10 个类,枚举类标签
c_cnt[i] = 0
for j = 1, lab:size(1) do
if lab[j] == i then
c_cnt[i] = c_cnt[i] + 1
end
end
print(string.format('%d: %d', i, c_cnt[i]))
end
-- 按类存成 .t7
for c = 0, 9 do -- 10 个类,枚举类标签
local tx = torch.IntTensor(c_cnt[c], 201)
local im = torch.FloatTensor(c_cnt[c], 1024)
local pos = 1
for i = 1, img:size(1) do
if lab[i] == c then
tx[pos] = txt[i]
im[pos] = img[i]
pos = pos + 1
end
end
-- 存入 .t7
torch.save(string.format('data/image/%s.t7', c), im)
torch.save(string.format('data/text/%s.t7', c), tx)
end
.t7 → \rightarrow → .mat
(用上面刚保存的 .t7 文件)
step 1
lua 用torch.load()
从 .t7 文件读数据,再用matio.save()
存成 .mat
- File.lua 里的
torch.load
-- t7_to_mat.lua
require('torch')
local matio = require('matio')
-- read `.t7`
t = torch.load('data/text/1.t7')
print(t:size()) -- (360, 1024)
-- save as `.mat`
matio.save('data/text/1.mat', t)
结果
360
1024
[torch.LongStorage of size 2]
step 2: testing
python 测试下保存的 .mat
# test.py
from scipy.io import loadmat
a = loadmat('1.mat')
print(a)
print('a shape:', a['x'].shape) # a['x'] 才是数据
结果
{
'__header__': b'MATLAB 5.0 MAT-file, Platform: x86_64-redhat-linux-gnu,Created By: libmatio v1.5.3 on Fri Mar 29 18:54:21 2019',
'__version__': '1.0',
'__globals__': [],
'x': array([[2.0444584e-03, 1.6663823e-02, 9.6312715e-03, ..., 4.1695127e+00,
1.2398216e+00, 2.1180835e+00],
[7.4268819e-43, 1.6255062e-43, 7.8472714e-44, ..., 1.2926978e-40,
5.9695315e-43, 1.2471556e-43],
[5.6051939e-45, 1.6955711e-43, 3.5733111e-43, ..., 1.4012985e-42,
7.0064923e-45, 1.1210388e-44],
...,
[1.4573504e-43, 1.1210388e-44, 1.0369609e-43, ..., 7.4591117e-42,
5.4650640e-43, 6.3106075e-41],
[2.8025969e-45, 1.1210388e-44, 1.6228438e-41, ..., 1.2611686e-44,
9.2843030e-41, 2.5223372e-44],
[1.9618179e-44, 3.8073279e-42, 8.4077908e-45, ..., 9.6689594e-43,
1.2051167e-43, 4.2038954e-45]], dtype=float32)
}
a shape: (360, 1024)