% ------------------------------------------------------------------ %
% -------------- Prepared by : Ismail M. El-Badawy ----------------- %
% ------------------------------------------------------------------ %
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% This code reads the F56F11.4 dna sequence and detect the period-3 %
% -------- behavior using singular value decomposition with -------- %
% ---------- frame size 'frmsz' to predict the locations ----------- %
% --------------- of protein-coding regions (exons) ---------------- %
% ---------------------- in the dna sequence ----------------------- %
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% This code is a simulation of the method proposed in the following paper:
% M. Akhtar, E. Ambikairajah, and J. Epps, 揇etection of period-3
% behavior in genomic sequences using singular value decomposition,�
% in Proc. IEEE Int. Conf. Emerging Technologies, 2005, pp. 13�17.
clear all;close all;clc
% Read dna sequence from FASTA format file downloaded from http://www.ncbi.nlm.nih.gov/ %
% ------------------------------------------------------------------------------------- %
sample_dna = fastaread('sequence_AF099922.fasta'); % Caenorhabditis elegans (Wormbase)
% Convert the sequence from structure array to string %
% --------------------------------------------------- %
sample_dna = struct2cell(sample_dna);
sample_dna = cell2mat(sample_dna(2));
% Take only the F56F11.4 sequence (positions from 7021 to 15020) %
% -------------------------------------------------------------- %
sample_dna = sample_dna(7021:15020);
% Actual locations of coding sequences (cds or exons) %
% --------------------------------------------------- %
cds = [928 1038 2527 2856 4113 4376 5464 5643 7254 7604];
for ctr = 1:2:length(cds)
actual_exons(cds(ctr):cds(ctr+1)) = 1;
end
actual_exons(cds(length(cds))+1:length(sample_dna)) = 0;
% Define the frame size for singular value decomposition (svd) %
% ------------------------------------------------------------ %
frmsz = 81;
if mod(frmsz,3) ~= 0
error('Frame size should be a multiple of 3')
end
% Convert string to numerical values (Binary Indicator Sequences) %
% --------------------------------------------------------------- %
[xA xT xC xG] = dna_binary(sample_dna);
xA = [zeros(1,floor(frmsz/2)) xA zeros(1,ceil(frmsz/2))]; % Zero padding from both sides
xT = [zeros(1,floor(frmsz/2)) xT zeros(1,ceil(frmsz/2))];
xC = [zeros(1,floor(frmsz/2)) xC zeros(1,ceil(frmsz/2))];
xG = [zeros(1,floor(frmsz/2)) xG zeros(1,ceil(frmsz/2))];
% Anti-notch Filter centered at angular frequency 2pi/3 %
% ----------------------------------------------------- %
R = 0.992;
theta = (2*pi)/3;
% Filter Coefficients %
% ------------------- %
b = [1 0 -1];
a = [1 -2*R*cos(theta) R^2];
% Filtering the signal (DNA Sequence) %
% ----------------------------------- %
uA = filter(b,a,xA);
uT = filter(b,a,xT);
uC = filter(b,a,xC);
uG = filter(b,a,xG);
% Detection of Period-3 Behavior using Singular Value Decomposition %
% ----------------------------------------------------------------- %
for n = 1:length(sample_dna)
sa = reshape(uA(n:n+frmsz-1),3,frmsz/3);
XA(n) = max(svd(sa));
st = reshape(uT(n:n+frmsz-1),3,frmsz/3);
XT(n) = max(svd(st));
sc = reshape(uC(n:n+frmsz-1),3,frmsz/3);
XC(n) = max(svd(sc));
sg = reshape(uG(n:n+frmsz-1),3,frmsz/3);
XG(n) = max(svd(sg));
end
X = XA + XT + XC + XG;
% Plot X Vs nucleotide positions %
% ------------------------------- %
plot(X/max(X))
hold on;
plot(actual_exons,':k','linewidth',1) % Show actual exonic regions
axis([0 8000 0 1.05])
xlabel('DNA Sequence (Nucleotide Position)')
ylabel('SVD')
title('Detection of Period-3 Behavior using SVD')