额。。。我们老师说做下比赛抵消一次作业。。。。就只好做做了。。。虽然做的很烂,有点还是当做笔记用一下吧。。。,主要是保存下怎么读取txt文档提取有用信息和做二重积分的写法,感觉这个写法还OK,能跑就是了
read.m
clear;
clc;
fid=fopen('NC_012920_1_cds.txt');
[yi,count]=fread(fid,Inf,'char'),
y=char(yi');
fclose(fid);
%判断第一个基因序列在哪,count总数
%变量解释
end_number=0; %中间变量,结束的元素号,每一段进行的标识
start_number=0; %中间变量,开始的元素号,每一段进行的标识
text_number=1; %文本号,中间变量,用来控制循环
text_count=1; %记录一共可能有几组数据的中间量
textstartField=0; %记录开始的元素号,行矩阵
textendField=0; %记录结束的元素号,行矩阵
while(count-end_number>1)
for m=text_number:1:count
if strcmp(y(m),'A')||strcmp(y(m),'T')||strcmp(y(m),'C')||strcmp(y(m),'G')
if strcmp(y(m+1),'A')||strcmp(y(m+1),'T')||strcmp(y(m+1),'C')||strcmp(y(m+1),'G')
if strcmp(y(m+2),'A')||strcmp(y(m+2),'T')||strcmp(y(m+2),'C')||strcmp(y(m+2),'G')
if strcmp(y(m+3),'A')||strcmp(y(m+3),'T')||strcmp(y(m+3),'C')||strcmp(y(m+3),'G')
start_number=m;
break;
end;
end;
end;
end;
end;
%判断结尾序列
for m=start_number:1:count
if (count-m<4)
if (~strcmp(y(m),'A'))&&(~strcmp(y(m),'T'))&&(~strcmp(y(m),'C'))&&(~strcmp(y(m),'G'))
end_number=m;
break;
end
end
if (~strcmp(y(m),'A'))&&(~strcmp(y(m),'T'))&&(~strcmp(y(m),'C'))&&(~strcmp(y(m),'G'))
if (~strcmp(y(m+1),'A'))&&(~strcmp(y(m+1),'T'))&&(~strcmp(y(m+1),'C'))&&(~strcmp(y(m+1),'G'))
if (~strcmp(y(m+2),'A'))&&(~strcmp(y(m+2),'T'))&&(~strcmp(y(m+2),'C'))&&(~strcmp(y(m+2),'G'))
end_number=m;
break;
end
end
end
end;
textstartField(text_count)=start_number;
textendField(text_count)=end_number;
text_count=text_count+1;
text_number=end_number+1;
end
%end
%clc
%textstartField
%textendField
%测试打印数据
%for x=1:1:text_count-1
%y(textstartField(x):textendField(x))
%end
%改这个选择哪一个txt文档中的哪一组数据
xx=1; %从第一组开始DNA,又是复用的x,不习惯可以换别的字母,最大为text_count-1
n=1; %转换数据列第一个为1
for k=textstartField(xx):1:textendField(xx)
if (~strcmp(y(k),'A'))&&(~strcmp(y(k),'T'))&&(~strcmp(y(k),'C'))&&(~strcmp(y(k),'G'))
n=n;
continue;
end;
if (strcmp(y(k),'A')==1)
DATA_A(n)=1;
else DATA_A(n)=0;
end
if (strcmp(y(k),'G')==1)
DATA_G(n)=1;
else DATA_G(n)=0;
end
if (strcmp(y(k),'C')==1)
DATA_C(n)=1;
else DATA_C(n)=0 ;
end
if (strcmp(y(k),'T')==1)
DATA_T(n)=1;
else DATA_T(n)=0;
end
n=n+1;
end
%计算功率谱和信噪比
pi=3.14;
N=length(DATA_A);
for k=1:1:N
n=N;
X1=0;X2=0;X3=0;X4=0;
while n>0
w=exp(-j*2*pi*n*k/N);
X1=w*DATA_A(n)+X1;
X2=w*DATA_T(n)+X2;
X3=w*DATA_C(n)+X3;
X4=w*DATA_G(n)+X4;
n=n-1;
end
p(k)=(abs(X1)^2+abs(X2)^2+abs(X3)^2+abs(X4)^2)
end
%绘图,这里复用了下X,影响不大,不习惯可以用别的符号做标识
xxx=1:1:length(DATA_A)/2;
plot(xxx,p(1:1:length(DATA_A)/2));
%figure;
%x2=1:1:length(DATA_A);
%plot(x2,p);
%AVG_E平均功率,R信噪比
max_number=fix(N/3);
if p(max_number)<p(max_number+1)
max_number=max_number+1;
end
if p(max_number)<p(max_number-1)
max_number=max_number-1;
end
%max_number
AVG_E=sum(p)/N;
R=p(max_number)/AVG_E
read2.m
clear;
clc;
fid=fopen('AB304259.1_n.txt');
[yi,count]=fread(fid,Inf,'char'),
y=char(yi');
fclose(fid);
%判断第一个基因序列在哪,count总数
%变量解释
end_number=0; %中间变量,结束的元素号,每一段进行的标识
start_number=0; %中间变量,开始的元素号,每一段进行的标识
text_number=1; %文本号,中间变量,用来控制循环
text_count=1; %记录一共可能有几组数据的中间量
textstartField=0; %记录开始的元素号,行矩阵
textendField=0; %记录结束的元素号,行矩阵
while(count-end_number>1)
for m=text_number:1:count
if strcmp(y(m),'A')||strcmp(y(m),'T')||strcmp(y(m),'C')||strcmp(y(m),'G')
if strcmp(y(m+1),'A')||strcmp(y(m+1),'T')||strcmp(y(m+1),'C')||strcmp(y(m+1),'G')
if strcmp(y(m+2),'A')||strcmp(y(m+2),'T')||strcmp(y(m+2),'C')||strcmp(y(m+2),'G')
if strcmp(y(m+3),'A')||strcmp(y(m+3),'T')||strcmp(y(m+3),'C')||strcmp(y(m+3),'G')
start_number=m;
break;
end;
end;
end;
end;
end;
%判断结尾序列
for m=start_number:1:count
if (count-m<4)
if (~strcmp(y(m),'A'))&&(~strcmp(y(m),'T'))&&(~strcmp(y(m),'C'))&&(~strcmp(y(m),'G'))
end_number=m;
break;
end
end
if (~strcmp(y(m),'A'))&&(~strcmp(y(m),'T'))&&(~strcmp(y(m),'C'))&&(~strcmp(y(m),'G'))
if (~strcmp(y(m+1),'A'))&&(~strcmp(y(m+1),'T'))&&(~strcmp(y(m+1),'C'))&&(~strcmp(y(m+1),'G'))
if (~strcmp(y(m+2),'A'))&&(~strcmp(y(m+2),'T'))&&(~strcmp(y(m+2),'C'))&&(~strcmp(y(m+2),'G'))
end_number=m;
break;
end
end
end
end;
textstartField(text_count)=start_number;
textendField(text_count)=end_number;
text_count=text_count+1;
text_number=end_number+1;
end
%end
%clc
%textstartField
%textendField
%测试打印数据
%for x=1:1:text_count-1
%y(textstartField(x):textendField(x))
%end
%改这个选择哪一个txt文档中的哪一组数据
xx=3; %从第一组开始DNA,又是复用的x,不习惯可以换别的字母,最大为text_count-1
n=1; %转换数据列第一个为1
for k=textstartField(xx):1:textendField(xx)
if (~strcmp(y(k),'A'))&&(~strcmp(y(k),'T'))&&(~strcmp(y(k),'C'))&&(~strcmp(y(k),'G'))
n=n;
continue;
end;
if (strcmp(y(k),'A')==1)
DATA(n)=0;
end
if (strcmp(y(k),'G')==1)
DATA(n)=1;
end
if (strcmp(y(k),'C')==1)
DATA(n)=2;
end
if (strcmp(y(k),'T')==1)
DATA(n)=3;
end
n=n+1;
end
%计算功率谱和信噪比
pi=3.14;
N=length(DATA);
for k=1:1:N
n=N;
X1=0;X2=0;X3=0;X4=0;
while n>0
w=exp(-j*2*pi*n*k/N);
X1=w*DATA(n)+X1;
%X2=w*DATA(n)+X2;
%X3=w*DATA(n)+X3;
%X4=w*DATA(n)+X4;
n=n-1;
end
%p(k)=(abs(X1)^2+abs(X2)^2+abs(X3)^2+abs(X4)^2)
p(k)=(abs(X1)^2)
end
%绘图,这里复用了下X,影响不大,不习惯可以用别的符号做标识
xxx=1:1:length(DATA)/2;
plot(xxx,p(1:1:length(DATA)/2));
%figure;
%x2=1:1:length(DATA_A);
%plot(x2,p);
%AVG_E平均功率,R信噪比
max_number=fix(N/3);
if p(max_number)<p(max_number+1)
max_number=max_number+1;
end
if p(max_number)<p(max_number-1)
max_number=max_number-1;
end
%max_number
AVG_E=sum(p)/N;
R=p(max_number)/AVG_E
read3.m
clear;
clc;
fid=fopen('abc.txt');
[yi,count]=fread(fid,Inf,'char'),
y=char(yi');
fclose(fid);
%判断第一个基因序列在哪,count总数
%变量解释
end_number=0; %中间变量,结束的元素号,每一段进行的标识
start_number=0; %中间变量,开始的元素号,每一段进行的标识
text_number=1; %文本号,中间变量,用来控制循环
text_count=1; %记录一共可能有几组数据的中间量
textstartField=0; %记录开始的元素号,行矩阵
textendField=0; %记录结束的元素号,行矩阵
while(count-end_number>1)
for m=text_number:1:count
if strcmp(y(m),'A')||strcmp(y(m),'T')||strcmp(y(m),'C')||strcmp(y(m),'G')
if strcmp(y(m+1),'A')||strcmp(y(m+1),'T')||strcmp(y(m+1),'C')||strcmp(y(m+1),'G')
if strcmp(y(m+2),'A')||strcmp(y(m+2),'T')||strcmp(y(m+2),'C')||strcmp(y(m+2),'G')
if strcmp(y(m+3),'A')||strcmp(y(m+3),'T')||strcmp(y(m+3),'C')||strcmp(y(m+3),'G')
start_number=m;
break;
end;
end;
end;
end;
end;
%判断结尾序列
for m=start_number:1:count
if (count-m<4)
if (~strcmp(y(m),'A'))&&(~strcmp(y(m),'T'))&&(~strcmp(y(m),'C'))&&(~strcmp(y(m),'G'))
end_number=m;
break;
end
end
if (~strcmp(y(m),'A'))&&(~strcmp(y(m),'T'))&&(~strcmp(y(m),'C'))&&(~strcmp(y(m),'G'))
if (~strcmp(y(m+1),'A'))&&(~strcmp(y(m+1),'T'))&&(~strcmp(y(m+1),'C'))&&(~strcmp(y(m+1),'G'))
if (~strcmp(y(m+2),'A'))&&(~strcmp(y(m+2),'T'))&&(~strcmp(y(m+2),'C'))&&(~strcmp(y(m+2),'G'))
end_number=m;
break;
end
end
end
end;
textstartField(text_count)=start_number;
textendField(text_count)=end_number;
text_count=text_count+1;
text_number=end_number+1;
end
%end
%clc
%textstartField
%textendField
%测试打印数据
%for x=1:1:text_count-1
%y(textstartField(x):textendField(x))
%end
%改这个选择哪一个txt文档中的哪一组数据
xx=1; %从第一组开始DNA,又是复用的x,不习惯可以换别的字母,最大为text_count-1
n=1; %转换数据列第一个为1
for k=textstartField(xx):1:textendField(xx)
if (~strcmp(y(k),'A'))&&(~strcmp(y(k),'T'))&&(~strcmp(y(k),'C'))&&(~strcmp(y(k),'G'))
n=n;
continue;
end;
if (strcmp(y(k),'A')==1)
DATA_A(n)=1;
else DATA_A(n)=0;
end
if (strcmp(y(k),'G')==1)
DATA_G(n)=1;
else DATA_G(n)=0;
end
if (strcmp(y(k),'C')==1)
DATA_C(n)=1;
else DATA_C(n)=0 ;
end
if (strcmp(y(k),'T')==1)
DATA_T(n)=1;
else DATA_T(n)=0;
end
n=n+1;
end
pi=3.14;
N=length(DATA_A); %总长度
M=24; %窗口大小
loopCount=1; %窗的记数位
while M*loopCount < N
for k=1:1:M
n=M;
X1=0;X2=0;X3=0;X4=0;
while n>0
w=exp(-j*2*pi*n*k/M);
X1=w*DATA_A(n+(loopCount-1)*M+1)+X1;
X2=w*DATA_T(n+(loopCount-1)*M+1)+X2;
X3=w*DATA_C(n+(loopCount-1)*M+1)+X3;
X4=w*DATA_G(n+(loopCount-1)*M+1)+X4;
n=n-1;
end
%p(k)=abs(X1); %test use
p(k)=(abs(X1)^2+abs(X2)^2+abs(X3)^2+abs(X4)^2)
end
%AVG_E平均功率,R信噪比
max_number=fix(M/3);
if p(max_number)<p(max_number+1)
max_number=max_number+1;
end
if p(max_number)<p(max_number-1)
max_number=max_number-1;
end
%max_number
AVG_E=sum(p)/M;
R(loopCount)=p(max_number)/AVG_E;
numberLoop=M*(loopCount-1); %中间一个窗口的偏移量
loopCount=loopCount +1;
end
N=length(DATA_A); %总长度
m=N-(loopCount-1)*M-1; %窗口大小
for k=1:1:m
n=m;
X1=0;X2=0;X3=0;X4=0;
while n>0
w=exp(-j*2*pi*n*k/m);
X1=w*DATA_A(n+(loopCount-1)*M+1)+X1;
X2=w*DATA_T(n+(loopCount-1)*M+1)+X2;
X3=w*DATA_C(n+(loopCount-1)*M+1)+X3;
X4=w*DATA_G(n+(loopCount-1)*M+1)+X4;
n=n-1;
end
%p(k)=abs(X1); %test use
p(k)=(abs(X1)^2+abs(X2)^2+abs(X3)^2+abs(X4)^2)
%AVG_E平均功率,R信噪比
max_number=fix(m/3);
if p(max_number)<p(max_number+1)
max_number=max_number+1;
end
if p(max_number)<p(max_number-1)
max_number=max_number-1;
end
%max_number
AVG_E=sum(p)/m;
R(loopCount)=p(max_number)/AVG_E;
end
x_label=1:M:M*length(R);
plot(x_label,R);