R_MarinStats_Notes2

最新推荐文章于 2023-04-27 22:37:10 发布

weixin_30852367

最新推荐文章于 2023-04-27 22:37:10 发布

阅读量309

点赞数

原文链接：http://www.cnblogs.com/howlowl/p/8507114.html

版权

4.1 one-sample t-test and one-sample confidence interval for the mean using R

> help("t.test")
> boxplot(LungCap)
> #prove that mu < 8
> #0.08>0.05(one-sided); Ho: mu=8 isn't rejected 
> t.test(LungCap, mu=8, alternative = "less", conf.level = 0.95)

	One Sample t-test

data:  LungCap
t = -1.3842, df = 724, p-value = 0.08336
alternative hypothesis: true mean is less than 8
95 percent confidence interval:
     -Inf 8.025974
sample estimates:
mean of x 
 7.863148 

> t.test(LungCap, mu=8, alt = "less", conf = 0.95)

	One Sample t-test

data:  LungCap
t = -1.3842, df = 724, p-value = 0.08336
alternative hypothesis: true mean is less than 8
95 percent confidence interval:
     -Inf 8.025974
sample estimates:
mean of x 
 7.863148 

> t.test(LungCap)

	One Sample t-test

data:  LungCap
t = 79.535, df = 724, p-value < 2.2e-16
alternative hypothesis: true mean is not equal to 0
95 percent confidence interval:
 7.669052 8.057243
sample estimates:
mean of x 
 7.863148 

> t.test(LungCap, mu=8, alt = "two.sided", conf = 0.95)

	One Sample t-test

data:  LungCap
t = -1.3842, df = 724, p-value = 0.1667
alternative hypothesis: true mean is not equal to 8
95 percent confidence interval:
 7.669052 8.057243
sample estimates:
mean of x 
 7.863148 

> t.test(LungCap, mu=8, alt = "two.sided", conf = 0.99)

	One Sample t-test

data:  LungCap
t = -1.3842, df = 724, p-value = 0.1667
alternative hypothesis: true mean is not equal to 8
99 percent confidence interval:
 7.607816 8.118479
sample estimates:
mean of x 
 7.863148 

> TEST <- t.test(LungCap, mu=8, alt = "two.sided", conf = 0.99)
> TEST

	One Sample t-test

data:  LungCap
t = -1.3842, df = 724, p-value = 0.1667
alternative hypothesis: true mean is not equal to 8
99 percent confidence interval:
 7.607816 8.118479
sample estimates:
mean of x 
 7.863148 

> attributes(TEST)
$names
[1] "statistic"   "parameter"   "p.value"     "conf.int"    "estimate"    "null.value"  "alternative"
[8] "method"      "data.name"  

$class
[1] "htest"

> TEST$conf.int
[1] 7.607816 8.118479
attr(,"conf.level")
[1] 0.99
> TEST$p.value
[1]

　　4.2 two-sample t-test in r

> ?t.test
> #Ho: mean lung cap of smokers is equal to non-smokers
> #assume non equal variances
> #paired = F means that two groups are independent 
> t.test(LungCap ~ Smoke, mu=0, alt="two.sided", conf=0.95, var.eq=F, paired=F) 

	Welch Two Sample t-test

data:  LungCap by Smoke
t = -3.6498, df = 117.72, p-value = 0.0003927
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
 -1.3501778 -0.4003548
sample estimates:
 mean in group no mean in group yes 
         7.770188          8.645455 

> t.test(LungCap ~ Smoke)

	Welch Two Sample t-test

data:  LungCap by Smoke
t = -3.6498, df = 117.72, p-value = 0.0003927
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
 -1.3501778 -0.4003548
sample estimates:
 mean in group no mean in group yes 
         7.770188          8.645455 

> t.test(LungCap[Smoke=="no"], LungCap[Smoke=="yes"])

	Welch Two Sample t-test

data:  LungCap[Smoke == "no"] and LungCap[Smoke == "yes"]
t = -3.6498, df = 117.72, p-value = 0.0003927
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
 -1.3501778 -0.4003548
sample estimates:
mean of x mean of y 
 7.770188  8.645455 

> t.test(LungCap ~ Smoke, mu=0, alt="two.sided", conf=0.95, var.eq=T, paired=F) 

	Two Sample t-test

data:  LungCap by Smoke
t = -2.7399, df = 723, p-value = 0.006297
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
 -1.5024262 -0.2481063
sample estimates:
 mean in group no mean in group yes 
         7.770188          8.645455 

> 
> #check if var of two groups is different
> boxplot(LungCap ~ Smoke)
> var(LungCap[Smoke=="yes"])
[1] 3.545292
> var(LungCap[Smoke=="no"])
[1] 7.431694
> #levene's test
> #Ho: population variances are equal
> install.packages(car)
Error in install.packages : object 'car' not found
> 
> install.packages("car")
also installing the dependencies ‘minqa’, ‘nloptr’, ‘Rcpp’, ‘RcppEigen’, ‘lme4’, ‘SparseM’, ‘MatrixModels’, ‘pbkrtest’, ‘quantreg’

trying URL 'https://cran.rstudio.com/bin/windows/contrib/3.4/minqa_1.2.4.zip'
Content type 'application/zip' length 666335 bytes (650 KB)
downloaded 650 KB

trying URL 'https://cran.rstudio.com/bin/windows/contrib/3.4/nloptr_1.0.4.zip'
Content type 'application/zip' length 1173065 bytes (1.1 MB)
downloaded 1.1 MB

trying URL 'https://cran.rstudio.com/bin/windows/contrib/3.4/Rcpp_0.12.15.zip'
Content type 'application/zip' length 4369688 bytes (4.2 MB)
downloaded 4.2 MB

trying URL 'https://cran.rstudio.com/bin/windows/contrib/3.4/RcppEigen_0.3.3.4.0.zip'
Content type 'application/zip' length 2663548 bytes (2.5 MB)
downloaded 2.5 MB

trying URL 'https://cran.rstudio.com/bin/windows/contrib/3.4/lme4_1.1-15.zip'
Content type 'application/zip' length 4741524 bytes (4.5 MB)
downloaded 4.5 MB

trying URL 'https://cran.rstudio.com/bin/windows/contrib/3.4/SparseM_1.77.zip'
Content type 'application/zip' length 968526 bytes (945 KB)
downloaded 945 KB

trying URL 'https://cran.rstudio.com/bin/windows/contrib/3.4/MatrixModels_0.4-1.zip'
Content type 'application/zip' length 202881 bytes (198 KB)
downloaded 198 KB

trying URL 'https://cran.rstudio.com/bin/windows/contrib/3.4/pbkrtest_0.4-7.zip'
Content type 'application/zip' length 196272 bytes (191 KB)
downloaded 191 KB

trying URL 'https://cran.rstudio.com/bin/windows/contrib/3.4/quantreg_5.35.zip'
Content type 'application/zip' length 2129222 bytes (2.0 MB)
downloaded 2.0 MB

trying URL 'https://cran.rstudio.com/bin/windows/contrib/3.4/car_2.1-6.zip'
Content type 'application/zip' length 1598696 bytes (1.5 MB)
downloaded 1.5 MB

package ‘minqa’ successfully unpacked and MD5 sums checked
package ‘nloptr’ successfully unpacked and MD5 sums checked
package ‘Rcpp’ successfully unpacked and MD5 sums checked
package ‘RcppEigen’ successfully unpacked and MD5 sums checked
package ‘lme4’ successfully unpacked and MD5 sums checked
package ‘SparseM’ successfully unpacked and MD5 sums checked
package ‘MatrixModels’ successfully unpacked and MD5 sums checked
package ‘pbkrtest’ successfully unpacked and MD5 sums checked
package ‘quantreg’ successfully unpacked and MD5 sums checked
package ‘car’ successfully unpacked and MD5 sums checked

The downloaded binary packages are in
	C:\Users\lenovo\AppData\Local\Temp\RtmpiETEDX\downloaded_packages
> library(car)
> leveneTest(LungCap ~ Smoke)
Levene's Test for Homogeneity of Variance (center = median)
       Df F value    Pr(>F)    
group   1  12.955 0.0003408 ***
      723                      
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
> #variances are not equal

　　4.3 Mann Whitney U aka Wilcoxon Rank-Sum Test in r

non-parametric method appropriate for examining the difference in Medians for 2 independent populations

> ?wilcox.test
> boxplot(LungCap ~ Smoke)
> #Ho: Median lung capacity of smokers = that of non smokers
> #two-sided test
> wilcox.test(LungCap ~ Smoke, mu=0, alt="two.sided", conf.int=T, conf.level=0.95, paired=F, exact = T, correct=T)

	Wilcoxon rank sum test with continuity correction

data:  LungCap by Smoke
W = 20128, p-value = 0.005538
alternative hypothesis: true location shift is not equal to 0
95 percent confidence interval:
 -1.3999731 -0.2499419
sample estimates:
difference in location 
            -0.8000564 

Warning messages:
1: In wilcox.test.default(x = c(6.475, 9.55, 11.125, 4.8, 6.225, 4.95,  :
  无法精確計算带连结的p值
2: In wilcox.test.default(x = c(6.475, 9.55, 11.125, 4.8, 6.225, 4.95,  :
  无法精確計算带连结的置信区间
> 
> wilcox.test(LungCap ~ Smoke, mu=0, alt="two.sided", conf.int=T, conf.level=0.95, paired=F, exact =F, correct=T)

	Wilcoxon rank sum test with continuity correction

data:  LungCap by Smoke
W = 20128, p-value = 0.005538
alternative hypothesis: true location shift is not equal to 0
95 percent confidence interval:
 -1.3999731 -0.2499419
sample estimates:
difference in location 
            -0.8000564 

> wilcox.test(LungCap ~ Smoke, mu=0, alt="two.sided", conf.level=0.95, paired=F, exact =F, correct=T)

	Wilcoxon rank sum test with continuity correction

data:  LungCap by Smoke
W = 20128, p-value = 0.005538
alternative hypothesis: true location shift is not equal to 0

　　4.4 paired t test in r

> BloodPressure = read.table(file = file.choose(), header=T, sep="\t")
> attach(BloodPressure)
> dim(BloodPressure)
[1] 25  3
> names(BloodPressure)
[1] "Subject" "Before"  "After"  
> BloodPressure[1:3,]
  Subject Before After
1       1    135   127
2       2    142   145
3       3    137   131
> 
> boxplot(Before, After)
> plot(Before, After)
> abline(a=0, b=1)
> 
> #Ho: mean difference in bloodpressure is 0
> t.test(Before, After, mu=0, alt="two.sided", paired = T, con.level=0.99)

	Paired t-test

data:  Before and After
t = 3.8882, df = 24, p-value = 0.0006986
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
  3.753515 12.246485
sample estimates:
mean of the differences 
                      8 

> t.test(Before, After, mu=0, alt="two.sided", paired = T, conf.level=0.99)

	Paired t-test

data:  Before and After
t = 3.8882, df = 24, p-value = 0.0006986
alternative hypothesis: true difference in means is not equal to 0
99 percent confidence interval:
  2.245279 13.754721
sample estimates:
mean of the differences 
                      8 

> t.test(After, Before, mu=0, alt="two.sided", paired = T, conf.level=0.99)

	Paired t-test

data:  After and Before
t = -3.8882, df = 24, p-value = 0.0006986
alternative hypothesis: true difference in means is not equal to 0
99 percent confidence interval:
 -13.754721  -2.245279
sample estimates:
mean of the differences

　　4.5 Wilcoxon Signed rank test in r

non-parametric method appropriate for examining the Median difference in observations for 2 populations that are paired or dependent on on another

> ?wilcox.test
> boxplot(Before, After)
> #Ho: Median change in sbp is 0
> #two-sided test
> wilcox.test(Before, After, mu=0, alt="two.sided", paired=T, conf.int = T, conf.level = 0.99)

	Wilcoxon signed rank test with continuity correction

data:  Before and After
V = 267, p-value = 0.0008655
alternative hypothesis: true location shift is not equal to 0
99 percent confidence interval:
  2.000012 14.000038
sample estimates:
(pseudo)median 
      7.500019 

Warning messages:
1: In wilcox.test.default(Before, After, mu = 0, alt = "two.sided",  :
  无法精確計算带连结的p值
2: In wilcox.test.default(Before, After, mu = 0, alt = "two.sided",  :
  有连结时无法計算精確的置信区间
3: In wilcox.test.default(Before, After, mu = 0, alt = "two.sided",  :
  有0时无法計算精確的p值
4: In wilcox.test.default(Before, After, mu = 0, alt = "two.sided",  :
  有零时无法計算精確的置信区间
> wilcox.test(Before, After, mu=0, alt="two.sided", paired=T, conf.int = T, conf.level = 0.99, exact=F)

	Wilcoxon signed rank test with continuity correction

data:  Before and After
V = 267, p-value = 0.0008655
alternative hypothesis: true location shift is not equal to 0
99 percent confidence interval:
  2.000012 14.000038
sample estimates:
(pseudo)median 
      7.500019 

> wilcox.test(Before, After, mu=0, alt="two.sided", paired=T, conf.int = T, conf.level = 0.99, exact=F, correct=F)

	Wilcoxon signed rank test

data:  Before and After
V = 267, p-value = 0.0008221
alternative hypothesis: true location shift is not equal to 0
99 percent confidence interval:
  2.000083 14.000027
sample estimates:
(pseudo)median 
      7.500019

　　4.6 one-way analysis of variance (ANOVA) and kruskal wallis one-way analysis of variance using r

anova is a parametric method appropriate for comparing the means for 2 or more independent populations

> DietData <- read.table(file.choose(), header = T, sep="\t")
> View(DietData)
> View(DietData)
> attach(DietData)
The following object is masked from package:car:

    WeightLoss

> class(WeightLoss)
[1] "numeric"
> names(DietData)
[1] "WeightLoss" "Diet"      
> class(Diet)
[1] "factor"
> levels(Diet)
[1] "A" "B" "C" "D"
> 
> ?aov
> boxplot(WeightLoss ~ Diet)
> 
> #Ho: mean weight loss is the same for all diets
> ANOVA1 = aov(WeightLoss ~ Diet)
> ANOVA1
Call:
   aov(formula = WeightLoss ~ Diet)

Terms:
                     Diet Residuals
Sum of Squares   97.32983 296.98667
Deg. of Freedom         3        56

Residual standard error: 2.302897
Estimated effects may be unbalanced
> summary(ANOVA1)
            Df Sum Sq Mean Sq F value  Pr(>F)   
Diet         3  97.33   32.44   6.118 0.00113 **
Residuals   56 296.99    5.30                   
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
> attributes(ANOVA1)
$names
 [1] "coefficients"  "residuals"     "effects"       "rank"          "fitted.values" "assign"       
 [7] "qr"            "df.residual"   "contrasts"     "xlevels"       "call"          "terms"        
[13] "model"        

$class
[1] "aov" "lm" 

> ANOVA1$coefficients
(Intercept)       DietB       DietC       DietD 
  9.1800000  -0.2733333   2.9333333   1.3600000 
> 
> #pr(>F) not all means are equal
> summary(ANOVA1)
            Df Sum Sq Mean Sq F value  Pr(>F)   
Diet         3  97.33   32.44   6.118 0.00113 **
Residuals   56 296.99    5.30                   
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
> #which means of diets may differ from others
> TukeyHSD(ANOVA1)
  Tukey multiple comparisons of means
    95% family-wise confidence level

Fit: aov(formula = WeightLoss ~ Diet)

$Diet
          diff        lwr       upr     p adj
B-A -0.2733333 -2.4999391 1.9532725 0.9880087
C-A  2.9333333  0.7067275 5.1599391 0.0051336
D-A  1.3600000 -0.8666058 3.5866058 0.3773706
C-B  3.2066667  0.9800609 5.4332725 0.0019015
D-B  1.6333333 -0.5932725 3.8599391 0.2224287
D-C -1.5733333 -3.7999391 0.6532725 0.2521236

> plot(TukeyHSD(ANOVA1))
> plot(TukeyHSD(ANOVA1), las=1)
> 
> kruskal.test(WeightLoss ~ Diet)

	Kruskal-Wallis rank sum test

data:  WeightLoss by Diet
Kruskal-Wallis chi-squared = 15.902, df = 3, p-value = 0.001188

4.7 Chi-square test of independence and Fisher's exact test using r

the Chi-square test of independence is a parametric method appropriate for testing independence between two categorical variables

Chi-square test of independence:

Null hypothesis: Assumes that there is no association between the two variables.

Alternative hypothesis: Assumes that there is an association between the two variables.

> ?chisq.test
> table(Gender, Smoke)
        Smoke
Gender    no yes
  female 314  44
  male   334  33
> TAB = table(Gender, Smoke)
> TAB
        Smoke
Gender    no yes
  female 314  44
  male   334  33
> barplot(TAB, beside = T, legend = T)
> chisq.test(TAB, correct=T)

	Pearson's Chi-squared test with Yates' continuity correction

data:  TAB
X-squared = 1.7443, df = 1, p-value = 0.1866

> CHI = chisq.test(TAB, correct=T)
> CHI

	Pearson's Chi-squared test with Yates' continuity correction

data:  TAB
X-squared = 1.7443, df = 1, p-value = 0.1866

> attributes(CHI)
$names
[1] "statistic" "parameter" "p.value"   "method"    "data.name" "observed"  "expected"  "residuals"
[9] "stdres"   

$class
[1] "htest"

> CHI$expected
        Smoke
Gender         no      yes
  female 319.9779 38.02207
  male   328.0221 38.97793
> fisher.test(TAB, conf.int = T, conf.level = 0.99)

	Fisher's Exact Test for Count Data

data:  TAB
p-value = 0.1845
alternative hypothesis: true odds ratio is not equal to 1
99 percent confidence interval:
 0.3625381 1.3521266
sample estimates:
odds ratio 
 0.7054345

　4.9 correlation and corvariance in r

cor.test # Ho: the corelation is 0

> ?cor.test
> plot(Age, LungCap, main = "Scatterplot", las=1)
> cor(Age, LungCap)
[1] 0.8196749
> cor(Age, LungCap, method="pearson")
[1] 0.8196749
> cor(Age, LungCap, method="spearman")
[1] 0.8172464
> cor(Age, LungCap, method="kendall")
[1] 0.639576
> cor.test(Age, LungCap, method="pearson")

	Pearson's product-moment correlation

data:  Age and LungCap
t = 38.476, df = 723, p-value < 2.2e-16
alternative hypothesis: true correlation is not equal to 0
95 percent confidence interval:
 0.7942660 0.8422217
sample estimates:
      cor 
0.8196749 

> cor.test(Age, LungCap, method="spearman")

	Spearman's rank correlation rho

data:  Age and LungCap
S = 11607000, p-value < 2.2e-16
alternative hypothesis: true rho is not equal to 0
sample estimates:
      rho 
0.8172464 

Warning message:
In cor.test.default(Age, LungCap, method = "spearman") :
  无法给连结计算精確p值
> cor.test(Age, LungCap, method="spearman", exact=F)

	Spearman's rank correlation rho

data:  Age and LungCap
S = 11607000, p-value < 2.2e-16
alternative hypothesis: true rho is not equal to 0
sample estimates:
      rho 
0.8172464 

> cor.test(Age, LungCap, method="pearson", alt="greater", conf.level = 0.99)

	Pearson's product-moment correlation

data:  Age and LungCap
t = 38.476, df = 723, p-value < 2.2e-16
alternative hypothesis: true correlation is greater than 0
99 percent confidence interval:
 0.7891778 1.0000000
sample estimates:
      cor 
0.8196749 

> 
> cov(Age, LungCap)
[1] 8.738289
> pairs(LungCapData)
> #scatterplot not appropriate for categorical variables
> pairs(LungCapData[, 1:3])
> cor(LungCapData[1:3,])
Error in cor(LungCapData[1:3, ]) : 'x'必需为数值
> cor(LungCapData[, 1:3])
          LungCap       Age    Height
LungCap 1.0000000 0.8196749 0.9121873
Age     0.8196749 1.0000000 0.8357368
Height  0.9121873 0.8357368 1.0000000
> 
> cor(LungCapData[, 1:3], method = "spearman")
          LungCap       Age    Height
LungCap 1.0000000 0.8172464 0.9088871
Age     0.8172464 1.0000000 0.8322843
Height  0.9088871 0.8322843 1.0000000
> cov(LungCapData[, 1:3], method = "spearman")
         LungCap      Age   Height
LungCap 43861.66 35752.67 39865.11
Age     35752.67 43634.08 36410.38
Height  39865.11 36410.38 43861.26