前言
UCI Dataset有很多原始数据,需要下载后修改格式为.mat。本文介绍optdigits的转换方法
提示:以下是本篇文章正文内容,下面案例可供参考
一、原始数据情况
原始数据有四个文件
optdigits-orig.tra Training 1934
optdigits-orig.cv Validation 946
optdigits-orig.wdep Writer-dependent 943
optdigits-orig.windep Writer-independent 1797
前三个合并为训练集,个数为3823.
二、使用步骤
1.引入数据
代码如下:
%optdigits-orig.tra Training 1934
fileID = fopen('optdigits-orig.wdep','r');
formatSpec = '%s';
data = textscan(fileID,formatSpec);
data{1}(1:66)=[];
data = char(data{1});
% Define important parameters
nx = 32; ny = 32; nxny = nx*ny;
nNumbers = size(data,1)/ (nx + 1);
% Initialize the output
trainingMatrix = zeros(nxny, nNumbers);
output = zeros(nNumbers, 1);
% Subdevide data into images
for i = 1:nNumbers
startingLocation = ((i-1)*(nx+1))+1;
endingLocation = startingLocation + ny - 1;
outputLocation = endingLocation +1;
trainingMatrix(:,i) = str2num(reshape(data(startingLocation:endingLocation, :), nxny, 1));
output(i) = str2double(data(outputLocation, :));
end
% Visualize some the images
figure('Color', 'White');
for i = 1:6
subplot(2,3,i)
imagesc(reshape(trainingMatrix(:,i),32,32));
axis equal; axis tight;
set(gca,'xTickLabel', [], 'yTickLabel', []);
colormap gray;
end
train1=trainingMatrix;
train1cls=output;
%optdigits-orig.cv Validation 946
fileID = fopen('optdigits-orig.tra','r');
formatSpec = '%s';
data = textscan(fileID,formatSpec);
data{1}(1:65)=[];
data = char(data{1});
% Define important parameters
nx = 32; ny = 32; nxny = nx*ny;
nNumbers = size(data,1)/ (nx + 1);
% Initialize the output
trainingMatrix = zeros(nxny, nNumbers);
output = zeros(nNumbers, 1);
% Subdevide data into images
for i = 1:nNumbers
startingLocation = ((i-1)*(nx+1))+1;
endingLocation = startingLocation + ny - 1;
outputLocation = endingLocation +1;
trainingMatrix(:,i) = str2num(reshape(data (startingLocation:endingLocation, :), nxny, 1));
output(i) = str2double(data(outputLocation, :));
end
train2=trainingMatrix;
train2cls=output;
%optdigits-orig.wdep Writer-dependent 943
fileID = fopen('optdigits-orig.cv','r');
formatSpec = '%s';
data = textscan(fileID,formatSpec);
data{1}(1:65)=[];
data = char(data{1});
% Define important parameters
nx = 32; ny = 32; nxny = nx*ny;
nNumbers = size(data,1)/ (nx + 1);
% Initialize the output
trainingMatrix = zeros(nxny, nNumbers);
output = zeros(nNumbers, 1);
% Subdevide data into images
for i = 1:nNumbers
startingLocation = ((i-1)*(nx+1))+1;
endingLocation = startingLocation + ny - 1;
outputLocation = endingLocation +1;
trainingMatrix(:,i) = str2num(reshape(data (startingLocation:endingLocation, :), nxny, 1));
output(i) = str2double(data(outputLocation, :));
end
train3=trainingMatrix;
train3cls=output;
%generate traindata 1934+946+943=3823
tradata=[train1';train2']';
tradata=[tradata';train3']';
tradata=tradata';
letclasstra=[train1cls;train2cls];
letclasstra=[letclasstra;train3cls];
%optdigits-orig.windep Writer-independent 1797
fileID = fopen('optdigits-orig.windep','r');
formatSpec = '%s';
data = textscan(fileID,formatSpec);
data{1}(1:66)=[];
data = char(data{1});
% Define important parameters
nx = 32; ny = 32; nxny = nx*ny;
nNumbers = size(data,1)/ (nx + 1);
% Initialize the output
trainingMatrix = zeros(nxny, nNumbers);
output = zeros(nNumbers, 1);
% Subdevide data into images
for i = 1:nNumbers
startingLocation = ((i-1)*(nx+1))+1;
endingLocation = startingLocation + ny - 1;
outputLocation = endingLocation +1;
trainingMatrix(:,i) = str2num(reshape(data (startingLocation:endingLocation, :), nxny, 1));
output(i) = str2double(data(outputLocation, :));
end
testdata=trainingMatrix;
testdata=testdata';
letclasstest=output;
2.按类标重组数据
代码如下
%rearrange data according to label 1..9
letclasstra=letclasstra+1;
letclasstest=letclasstest+1;
trcls=[]; tscls=[]; trdata=[]; tsdata=[]; ndigit=10;
for j=1:ndigit
index=find(letclasstra==j); trndata(j)=length(index);
trcls=[trcls;letclasstra(index)];
trdata=[trdata; tradata(index, :)]; tr{j}=tradata(index, :);
index=find(letclasstest==j); tsndata(j)=length(index);
tscls=[tscls;letclasstest(index)];
tsdata=[tsdata; testdata(index, :)]; ts{j}=testdata(index, :);
end
save optdigits32a.mat trdata tsdata trcls tscls;
Xtr=reshape(trdata',[32 32 3823]);
Xts=reshape(tsdata',[32 32 1797]);
save optdigits32a_mx.mat Xtr Xts trcls tscls;
总结
这样,就可以将32位的数据改为标准的mat文件