What is wrong with this code?
Error using network/train (line 340)
Inputs and targets have different numbers of samples
% Source of Data
filename='wdbc.data';
url=['https://archive.ics.uci.edu/ml/machine-learning-databases/breast-cancer-wisconsin/' filename];
% Read full data table (Use comma, space or tab delimiter depending on data
% source
file_destination = filename;
websave(file_destination,url);
T = readtable(file_destination, 'FileType','text', 'Delimiter', 'comma');
% ------------------------------------------------------------------------
% New Data Set (Mean)
% ------------------------------------------------------------------------
% Analyze Mean Datasets First
inwdbc_data=T(:,3:32);
inpwdbc_data=table2array(inwdbc_data);
outwdbc_data=zeros(size(inwdbc_data,1),3);
% Graphically plot the data as a box plot(Figure 1)
boxplot(inpwdbc_data(:,1:10),1)
title('Compare Mean Attributes Distributions')
xlabel('Cell Nucleus Attribute')
xticklabels({'radius','texture','perimeter','area','smoothness','compactness','concavity','concave pts','symmetry','fractal dimension'})
xtickangle(90)
ylabel('Mean Value')
% Normalize Mean Dataset to help identify and remove outliers (Figure 2)
FP.ymin=0.001; FP.ymax=1;
[Y,PS]=mapminmax(inpwdbc_data',FP);
boxplot(Y',1)
title('Normalized Mean Attributes Distributions')
xlabel('Cell Nucleus Attribute')
xticklabels({'radius','texture','perimeter','area','smoothness','compactness','concavity','concave pts','symmetry','fractal dimension'})
xtickangle(90)
ylabel('Normalised Mean Value')
% There are still quite a number of outliers with the dafault whisker size
% of 1.5 times of IQR. Increasing to a whisker size of 4 reduces the
% outliers and changing plot style allows for better visualisation
% Figure 3
whisk=2;
figure, boxplot(Y','Whisker',whisk,'PlotStyle','compact')
title('Normalised Std Deviation Attributes Distributions')
xlabel('Cell Nucleus Attribute')
xticklabels({'radius','texture','perimeter','area','smoothness','compactness','concavity','concave pts','symmetry','fractal dimension'})
xtickangle(90)
ylabel('Std Deviation Value')
% There are only 3 outliers left in attribute 4 (Area) with the whisker
% size of 4 times IQR.
% Figure 4
y_data=Y;
plot(y_data(:,2))
title('Normalised Mean Attributes Distributions (Reduced Outliers)')
xlabel('Cell Nucleus Attribute')
xticklabels({'radius','texture','perimeter','area','smoothness','compactness','concavity','concave pts','symmetry','fractal dimension'})
xtickangle(90)
ylabel('Mean Value')
%Now to find the instances that contain the outliers. An outlier is
%contained either above the upper whisker (quantile 3 + 4 * (quantile3 -
%quantile 1)) or below the lower whisker (Quantile 1 - 4 * (Quantile3 -
%Quantile 1))
% Find the first quartile (Quantile 1)
Q1 = quantile(y_data(:,2),0.25);
% Find the third quartile (Quantile 3)
Q3 = quantile(y_data(:,2),0.75);
% Find the lower whisker
lowerwhisker = Q1- whisk * (Q3 - Q1);
% Find the upper whisker
upperwhisker = Q3 + whisk * (Q3 - Q1);
meanValue=mean(y_data(:,2));
outliers= (y_data(:,2)>upperwhisker)|(y_data(:,2)
Noutliers=sum(outliers);
% Let us assign NaN (Not a Number) to the outlier(s)
y_data(outliers,2)=NaN;
% Let us plot the data with NaN (Figure 5)
plot(y_data(:,2))
title('Normalised Mean Attributes Distributions (Outliers Removed)')
xlabel('Cell Nucleus Attribute')
xticklabels({'radius','texture','perimeter','area','smoothness','compactness','concavity','concave pts','symmetry','fractal dimension'})
xtickangle(90)
ylabel('Mean Value')
% Now we replace the missing values (NaN), using linear interpolation and
% return the filled vector F and the logical vector TF. The value 1 (true)
% in entries of TF corresponds to the values of F that were filled
[F,TF] = fillmissing(y_data(:,2),'linear',1);
% Now we plot the attribute after filling the missing values (Figure 6)
plot(F)
title('Normalised Mean Attributes Distributions')
xlabel('Cell Nucleus Attribute')
xticklabels({'radius','texture','perimeter','area','smoothness','compactness','concavity','concave pts','symmetry','fractal dimension'})
xtickangle(90)
ylabel('Mean Value')
% Now set the target labels the output target variable outwdbc_data
for i=1:size(inwdbc_data,1)
if (strcmp(table2cell(T(i,2)), 'M'))
outwdbc_data(i,1)=1;
elseif (strcmp(table2cell(T(i,2)), 'B'))
outwdbc_data(i,2)=1;
else
outwdbc_data(i,3)=1;
end
end
% Assign all the attributes including the ones after outliers corrections
% as row vectors to the input patterns 'inputs'
inputs=[y_data(:,1),F,y_data(:,3:10)]';
% Assign the output target variables as row vectors to the output target
% patterns 'targets'
targets=outwdbc_data';
% Now we build a multi-layer perception (MLP) network.
% Let us fix the seed for initializing random number generation, so that
% same values are initialised in the repeated runs.
rng(0)
% Initialise the network
hiddenLayerSize=1;
net = patternnet(hiddenLayerSize, 'trainlm');
% Set up the division of data for training with cross validation
net.divideParam.trainRatio = 70/100;
net.divideParam.valRatio = 15/100;
net.divideParam.testRatio = 15/100;
% Using these settings, the input vectors and target vectors will be
% randomly divided, with 70% used for training, 15% for validation and 15%
% for testing
[net,tr]=train(net,inputs,targets);
outputs = net(inputs);
errors = gsubtract(targets,outputs);
performance = perform(net,targets,outputs);
% view the network diagram
view(net)
% Plot the training, validation and test performance
figure, plotperform(tr)
% Use the plot confusion function to plot the confusion matrix. It shows
% the various types of errors that occurred for the final trained network.
figure, plotconfusion(targets,outputs)