四种基本变量
一、两变量的相关性分析
相关系数及其检验:
1)Pearson相关系数(皮尔逊)
适用于定距、定比类型的变量。是运用最广的一种相关程度统计量。检验用t统计量:其中t服从自由度(n-2)的分布。
相关系数的性质:
function coeff = myPearson(X , Y)
% 本函数实现了皮尔逊相关系数的计算操作
% 输入:
% X:输入的数值序列
% Y:输入的数值序列
% 输出:
% coeff:两个输入数值序列X,Y的相关系数
if length(X) ~= length(Y)
error('两个数值数列的维数不相等');
return;
end
fenzi = sum((X-sum(X)/length(X)).*(Y-sum(Y)/length(Y)));
fenmu = sqrt(sum((X-sum(X)/length(X)).^2))*sqrt(sum((Y-sum(Y)/length(Y)).^2));
coef = fenzi / fenmu;
if length(X)>30
coeff = coef;
elseif length(X)>4 && length(X)<30
coeff = coef*(1+(1-coef^2)/(2*(length(X)-4))); %计算无偏相关系数加以矫正
else
coeff = coef;
fprintf('数据长度小于5 %8.4f\n',coeff);
end
end %函数myPearson结束
2)Spearman等级相关系数(斯皮尔曼)
适用于度量定序变量与定序变量之间的相关.
function coeff = mySpearman(X , Y)
% 本函数用于实现斯皮尔曼等级相关系数的计算操作
%
% 输入:
% X:输入的数值序列
% Y:输入的数值序列
%
% 输出:
% coeff:两个输入数值序列X,Y的相关系数
if length(X) ~= length(Y)
error('两个数值数列的维数不相等');
return;
end
%维度相等才能做相关性分析
N = length(X); %得到序列的长度
Xrank = zeros(1 , N); %存储X中各元素的排行
Yrank = zeros(1 , N); %存储Y中各元素的排行
%计算Xrank中的各个值
for i = 1 : N
cont1 = 1; %记录大于特定元素的元素个数
cont2 = -1; %记录与特定元素相同的元素个数
for j = 1 : N
if X(i) < X(j)
cont1 = cont1 + 1;
elseif X(i) == X(j)
cont2 = cont2 + 1;
end
end
Xrank(i) = cont1 + mean([0 : cont2]);
end
%计算Yrank中的各个值
for i = 1 : N
cont1 = 1; %记录大于特定元素的元素个数
cont2 = -1; %记录与特定元素相同的元素个数
for j = 1 : N
if Y(i) < Y(j)
cont1 = cont1 + 1;
elseif Y(i) == Y(j)
cont2 = cont2 + 1;
end
end
Yrank(i) = cont1 + mean([0 : cont2]);
end
%利用差分等级(或排行)序列计算斯皮尔曼等级相关系数
fenzi = 6 * sum((Xrank - Yrank).^2);
fenmu = N * (N^2 - 1);
coeff = 1 - fenzi / fenmu;
end %函数mySpearman结束
t检验
function ttest = myttest(X , Y)
if Y <= 2
error('计算相关系数的样本量不够');
return;
end
if X == NaN
ttest = Z;
return;
end
r2 = myPearson(X , Y);%计算pearson系数
%rs = mySpearson(X , Y);%计算Spearson系数
[m,n]=size(Y)
Z = m-2;%求出自由度
tvalue=abs(r2/(sqrt((1-r2^2)/Z)));%t检验
%t=rs*sqrt((n-2)/(1-rs^2));%t检验
if Z == 1
tnorm = 12.71;
if tvalue > tnorm
ttest = Z;
else
ttest = NaN;
end
elseif Z == 2
tnorm = 4.30;
if tvalue > tnorm
ttest = Z;
else
ttest = NaN;
end
elseif Z == 3
tnorm = 3.18;
if tvalue > tnorm
ttest = Z;
else
ttest = NaN;
end
elseif Z == 4
tnorm = 2.78;
if tvalue > tnorm
ttest = Z;
else
ttest = NaN;
end
elseif Z == 5
tnorm = 2.57;
if tvalue > tnorm
ttest = Z;
else
ttest = NaN;
end
elseif Z == 6
tnorm = 2.45;
if tvalue > tnorm
ttest = Z;
else
ttest = NaN;
end
elseif Z == 7
tnorm = 2.37;
if tvalue > tnorm
ttest = Z;
else
ttest = NaN;
end
elseif Z == 8
tnorm = 2.31;
if tvalue > tnorm
ttest = X;
else
ttest = NaN;
end
elseif Z == 9
tnorm = 2.26;
if tvalue > tnorm
ttest = Z;
else
ttest = NaN;
end
elseif Z == 10
tnorm = 2.23;
if tvalue > tnorm
ttest = Z;
else
ttest = NaN;
end
elseif Z == 11
tnorm = 2.20;
if tvalue > tnorm
ttest = Z;
else
ttest = NaN;
end
elseif Z == 12
tnorm = 2.18;
if tvalue > tnorm
ttest = Z;
else
ttest = NaN;
end
elseif Z == 13
tnorm = 2.16;
if tvalue > tnorm
ttest = Z;
else
ttest = NaN;
end
elseif Z == 14
tnorm = 2.15;
if tvalue > tnorm
ttest = Z;
else
ttest = NaN;
end
elseif Z == 15
tnorm = 2.13;
if tvalue > tnorm
ttest = Z;
else
ttest = NaN;
end
elseif Z == 16
tnorm = 2.12;
if tvalue > tnorm
ttest = Z;
else
ttest = NaN;
end
elseif Z == 17
tnorm = 2.11;
if tvalue > tnorm
ttest = Z;
else
ttest = NaN;
end
elseif Z == 18
tnorm = 2.10;
if tvalue > tnorm
ttest = Z;
else
ttest = NaN;
end
elseif Z == 19
tnorm = 2.09;
if tvalue > tnorm
ttest = Z;
else
ttest = NaN;
end
elseif Z == 20
tnorm = 2.09;
if tvalue > tnorm
ttest = Z;
else
ttest = NaN;
end
elseif Z == 21
tnorm = 2.08;
if tvalue > tnorm
ttest = Z;
else
ttest = NaN;
end
elseif Z == 22
tnorm = 2.07;
if tvalue > tnorm
ttest = Z;
else
ttest = NaN;
end
elseif Z == 23
tnorm = 2.07;
if tvalue > tnorm
ttest = Z;
else
ttest = NaN;
end
elseif Z == 24
tnorm = 2.06;
if tvalue > tnorm
ttest = Z;
else
ttest = NaN;
end
elseif Z == 25
tnorm = 2.06;
if tvalue > tnorm
ttest = Z;
else
ttest = NaN;
end
elseif Z == 26
tnorm = 2.06;
if tvalue > tnorm
ttest = Z;
else
ttest = NaN;
end
elseif Z == 27
tnorm = 2.05;
if tvalue > tnorm
ttest = Z;
else
ttest = NaN;
end
elseif Z == 28
tnorm = 2.05;
if tvalue > tnorm
ttest = Z;
else
ttest = NaN;
end
elseif Z == 29
tnorm = 2.04;
if tvalue > tnorm
ttest = Z;
else
ttest = NaN;
end
elseif Z == 30
tnorm = 2.04;
if tvalue > tnorm
ttest = Z;
else
ttest = NaN;
end
else
tnorm = 1.96;
if tvalue > tnorm
ttest = Z;
else
ttest = NaN;
end
end
end %函数myttest结束
卡方检验
SPSS(两列数据)-->分析-->描述统计(交叉表【行:因变量;列:自变量】)-->卡方
Eta检验
SPSS(两列数据)-->分析-->描述统计(交叉表【行:因变量;列:自变量】)-->相关性