R_MarinStats_Notes1

1.4 vector matrices 

> x <- 11  
> x  
[1] 11  
> x1 <- c(1,3,5,7,9)  
> x1  
[1] 1 3 5 7 9  
> gender <- c("male", "female")  
> gender  
[1] "male"   "female"  
> 2:7  
[1] 2 3 4 5 6 7  
> seq(from=1, to=7, by=1)  
[1] 1 2 3 4 5 6 7  
> seq(from=1, to=7, by=1/3)  
 [1] 1.000000 1.333333 1.666667 2.000000 2.333333 2.666667 3.000000 3.333333 3.666667 4.000000 4.333333 4.666667  
[13] 5.000000 5.333333 5.666667 6.000000 6.333333 6.666667 7.000000  
> seq(from=1, to=7, by=0.25)  
 [1] 1.00 1.25 1.50 1.75 2.00 2.25 2.50 2.75 3.00 3.25 3.50 3.75 4.00 4.25 4.50 4.75 5.00 5.25 5.50 5.75 6.00 6.25 6.50  
[24] 6.75 7.00  
> rep(1, times=10)  
 [1] 1 1 1 1 1 1 1 1 1 1  
> rep("marin", times=5)  
[1] "marin" "marin" "marin" "marin" "marin"  
> rep(1:3, times=5)  
 [1] 1 2 3 1 2 3 1 2 3 1 2 3 1 2 3  
> rep(seq(from=2, to=5, by=0.75), times=5)  
 [1] 2.00 2.75 3.50 4.25 5.00 2.00 2.75 3.50 4.25 5.00 2.00 2.75 3.50 4.25 5.00 2.00 2.75 3.50 4.25 5.00 2.00 2.75 3.50  
[24] 4.25 5.00  
> rep(c("m", "f"), times=5)  
 [1] "m" "f" "m" "f" "m" "f" "m" "f" "m" "f"  
> x <- 1:5  
> y <- c(1,3,5,7,9)  
> y  
[1] 1 3 5 7 9  
> x  
[1] 1 2 3 4 5  
> x + 10  
[1] 11 12 13 14 15  
> x - 10  
[1] -9 -8 -7 -6 -5  
> x * 10  
[1] 10 20 30 40 50  
> x/2  
[1] 0.5 1.0 1.5 2.0 2.5  
> x + y  
[1]  2  5  8 11 14  
> x-y  
[1]  0 -1 -2 -3 -4  
> x*y  
[1]  1  6 15 28 45  
> x/y  
[1] 1.0000000 0.6666667 0.6000000 0.5714286 0.5555556  
>   
> x  
[1] 1 2 3 4 5  
> y  
[1] 1 3 5 7 9  
> y[3]  
[1] 5  
> y[-3]  
[1] 1 3 7 9  
> y[1:3]  
[1] 1 3 5  
> y[c(1,5)]  
[1] 1 9  
> y[-c(1,5)]  
[1] 3 5 7  
> y[y<6]  
[1] 1 3 5  
> y[x<6]  
[1] 1 3 5 7 9  
> matrix(1:9, nrow=3, byrow=TRUE)  
     [,1] [,2] [,3]  
[1,]    1    2    3  
[2,]    4    5    6  
[3,]    7    8    9  
> mat <- matrix(1:9, nrow=3, byrow=FALSE)  
> mat  
     [,1] [,2] [,3]  
[1,]    1    4    7  
[2,]    2    5    8  
[3,]    3    6    9  
> mat <- matrix(1:9, nrow=3, byrow=TRUE)  
> mat  
     [,1] [,2] [,3]  
[1,]    1    2    3  
[2,]    4    5    6  
[3,]    7    8    9  
> mat[1, 2]  
[1] 2  
> mat[c(1,3), 2]  
[1] 2 8  
> mat[2]  
[1] 4  
> mat[2,]  
[1] 4 5 6  
> mat[,1]  
[1] 1 4 7  
> mat*10  
     [,1] [,2] [,3]  
[1,]   10   20   30  
[2,]   40   50   60  
[3,]   70   80   90  

 1.5 import data csv txt

> help("read.csv")
> ?read.csv
> data1 <- read.csv(file.choose(), header=T)
> data1
   LungCap Age Height Smoke Gender Caesarean
1     6.01  11   62.3    no   male        no
2     6.02  15   63.0   yes female        no
3     6.03  19   63.7    no female       yes
4     6.04  23   64.4   yes female        no
5     6.05  27   65.1    no   male        no
6     6.06  31   65.8   yes   male        no
7     6.07  35   66.5    no female       yes
8     6.08  39   67.2    no   male        no
9     6.09  43   67.9   yes   male        no
10    6.10  47   68.6   yes   male       no 
> data2 <- read.table(file.choose(), header=T, sep=",")
> data2
   LungCap Age Height Smoke Gender Caesarean
1     6.01  11   62.3    no   male        no
2     6.02  15   63.0   yes female        no
3     6.03  19   63.7    no female       yes
4     6.04  23   64.4   yes female        no
5     6.05  27   65.1    no   male        no
6     6.06  31   65.8   yes   male        no
7     6.07  35   66.5    no female       yes
8     6.08  39   67.2    no   male        no
9     6.09  43   67.9   yes   male        no
10    6.10  47   68.6   yes   male       no 
> data3 <-read.delim(file.choose(), header=T)
> data3
   LungCap Age Height Smoke Gender Caesarean
1     6.01  11   62.3    no   male        no
2     6.02  15   63.0   yes female        no
3     6.03  19   63.7    no female       yes
4     6.04  23   64.4   yes female        no
5     6.05  27   65.1    no   male        no
6     6.06  31   65.8   yes   male        no
7     6.07  35   66.5    no female       yes
8     6.08  39   67.2    no   male        no
9     6.09  43   67.9   yes   male        no
10    6.10  47   68.6   yes   male       no 
> data4 <- read.table(file.choose(), header=T, sep="\t")
> data4
   LungCap Age Height Smoke Gender Caesarean
1     6.01  11   62.3    no   male        no
2     6.02  15   63.0   yes female        no
3     6.03  19   63.7    no female       yes
4     6.04  23   64.4   yes female        no
5     6.05  27   65.1    no   male        no
6     6.06  31   65.8   yes   male        no
7     6.07  35   66.5    no female       yes
8     6.08  39   67.2    no   male        no
9     6.09  43   67.9   yes   male        no
10 

 1.6

> Data1 <- read.table(file="C:\\Users\\lenovo\\Desktop\\ExcelData.txt", header=T, sep="\t")
> Data2 <- read.table(file.choose(), header=T, sep="\t")
> LungCapData <- read.delim("C:/Users/lenovo/Desktop/ExcelData.txt")
>   View(LungCapData)
> rm(Data1)
> rm(Data2)
> dim(LungCapData)
[1] 10  6
> head(LungCapData)
  LungCap Age Height Smoke Gender Caesarean
1    6.01  11   62.3    no   male        no
2    6.02  15   63.0   yes female        no
3    6.03  19   63.7    no female       yes
4    6.04  23   64.4   yes female        no
5    6.05  27   65.1    no   male        no
6    6.06  31   65.8   yes   male        no
> tail(LungCapData)
   LungCap Age Height Smoke Gender Caesarean
5     6.05  27   65.1    no   male        no
6     6.06  31   65.8   yes   male        no
7     6.07  35   66.5    no female       yes
8     6.08  39   67.2    no   male        no
9     6.09  43   67.9   yes   male        no
10    6.10  47   68.6   yes   male       no 
> LungCapData[5:9, ]
  LungCap Age Height Smoke Gender Caesarean
5    6.05  27   65.1    no   male        no
6    6.06  31   65.8   yes   male        no
7    6.07  35   66.5    no female       yes
8    6.08  39   67.2    no   male        no
9    6.09  43   67.9   yes   male        no
> names(LungCapData)
[1] "LungCap"   "Age"       "Height"    "Smoke"     "Gender"   
[6] "Caesarean"

 1.7 variables and data in r

> mean(Age)
Error in mean(Age) : object 'Age' not found
> LungCapData$Age
[1] 11 15 19 23 27 31 35 39 43 47
> mean(LungCapData$Age)
[1] 29
> attach(LungCapData) #detach(LungCapData)
> Age
 [1] 11 15 19 23 27 31 35 39 43 47
> Smoke
 [1] no  yes no  yes no  yes no  no  yes yes
Levels: no yes
> summary(Smoke)
 no yes 
  5   5 
> mean(Age)
[1] 29
> class(LungCap)
[1] "numeric"
> class(Age)
[1] "integer"
> class(Height)
[1] "numeric"
> class(Smoke)
[1] "factor"
> class(Gender)
[1] "factor"
> class(Caesarean)
[1] "factor"
> levels(Smoke)
[1] "no"  "yes"
> levels(Gender)
[1] "female" "male"  
> levels(LungCap)
NULL
> levels(Age)
NULL
> summary(LungCapData)
    LungCap           Age         Height      Smoke      Gender  Caesarean
 Min.   :6.010   Min.   :11   Min.   :62.30   no :5   female:4   no :8    
 1st Qu.:6.032   1st Qu.:20   1st Qu.:63.88   yes:5   male  :6   yes:2    
 Median :6.055   Median :29   Median :65.45                               
 Mean   :6.055   Mean   :29   Mean   :65.45                               
 3rd Qu.:6.077   3rd Qu.:38   3rd Qu.:67.03                               
 Max.   :6.100   Max.   :47   Max.   :68.60                               
> x <- c(0,1,1,1,0,0,0,0,0,0)
> x
 [1] 0 1 1 1 0 0 0 0 0 0
> class(x)
[1] "numeric"
> summary(x)
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
   0.00    0.00    0.00    0.30    0.75    1.00 
> x <- as.factor(x)
> x
 [1] 0 1 1 1 0 0 0 0 0 0
Levels: 0 1
> summary(x)
0 1 
7 3 

 1.8 Subset data in r 

> mean(Age[Gender=="female"])
[1] 23
> levels(Gender)
[1] "female" "male"  
> FemData <- LungCapData[Gender=="female"]
Error in `[.data.frame`(LungCapData, Gender == "female") : 
  undefined columns selected
> FemData <- LungCapData[Gender=="female",]
> FemData
  LungCap Age Height Smoke Gender Caesarean
2    6.02  15   63.0   yes female        no
3    6.03  19   63.7    no female       yes
4    6.04  23   64.4   yes female        no
7    6.07  35   66.5    no female       yes
> MaleData <- LungCapData[Gender=="male",]
> MaleData
   LungCap Age Height Smoke Gender Caesarean
1     6.01  11   62.3    no   male        no
5     6.05  27   65.1    no   male        no
6     6.06  31   65.8   yes   male        no
8     6.08  39   67.2    no   male        no
9     6.09  43   67.9   yes   male        no
10    6.10  47   68.6   yes   male        no
> dim(FemData)
[1] 4 6
> dim(MaleData)
[1] 6 6
> summary(Gender)
female   male 
     4      6 
> FemData[1:4, ]
  LungCap Age Height Smoke Gender Caesarean
2    6.02  15   63.0   yes female        no
3    6.03  19   63.7    no female       yes
4    6.04  23   64.4   yes female        no
7    6.07  35   66.5    no female       yes
> MaleOver15 <- LungCapData[Gender=="male" & Age>15,]
> MaleOver15
   LungCap Age Height Smoke Gender Caesarean
5     6.05  27   65.1    no   male        no
6     6.06  31   65.8   yes   male        no
8     6.08  39   67.2    no   male        no
9     6.09  43   67.9   yes   male        no
10    6.10  47   68.6   yes   male        no

 1.9 logical statements

 

> Age[1:5]
[1] 11 15 19 23 27
> temp <- Age>15
> temp[1:5]
[1] FALSE FALSE  TRUE  TRUE  TRUE
> temp2 <- as.numeric(Age>15)
> temp2[1:5]
[1] 0 0 1 1 1
> LungCapData[1:5,]
  LungCap Age Height Smoke Gender Caesarean
1    6.01  11   62.3    no   male        no
2    6.02  15   63.0   yes female        no
3    6.03  19   63.7    no female       yes
4    6.04  23   64.4   yes female        no
5    6.05  27   65.1    no   male        no
> FemSmoke <- Gender=="female" & Smoke =="yes"
> FemSmoke
 [1] FALSE  TRUE FALSE  TRUE FALSE FALSE FALSE FALSE FALSE FALSE
> MoreData <- cbind(LungCapData, FemSmoke)
> MoreData[1:5,]
  LungCap Age Height Smoke Gender Caesarean FemSmoke
1    6.01  11   62.3    no   male        no    FALSE
2    6.02  15   63.0   yes female        no     TRUE
3    6.03  19   63.7    no female       yes    FALSE
4    6.04  23   64.4   yes female        no     TRUE
5    6.05  27   65.1    no   male        no    FALSE
> rm(list=ls())

 1.10 set up a working directory in r

save

> getwd()
[1] "F:/RStudio"
> projectWD <- "F:\\RProjects"
> setwd(projectWD)
> getwd()
[1] "F:/RProjects"
> LungCapData = read.table(file.choose(), header=T, sep="\t")
> attach(LungCapData)
The following objects are masked from LungCapData (pos = 3):

    Age, Caesarean, Gender, Height, LungCap, Smoke

> MeanAge <- mean(Age)
> x <- c(1,2,3,4,5)
> y <- 14
> z = summary(LungCapData)
> View(LungCapData)
> z
    LungCap           Age         Height      Smoke      Gender  Caesarean
 Min.   :6.010   Min.   :11   Min.   :62.30   no :5   female:4   no :8    
 1st Qu.:6.032   1st Qu.:20   1st Qu.:63.88   yes:5   male  :6   yes:2    
 Median :6.055   Median :29   Median :65.45                               
 Mean   :6.055   Mean   :29   Mean   :65.45                               
 3rd Qu.:6.077   3rd Qu.:38   3rd Qu.:67.03                               
 Max.   :6.100   Max.   :47   Max.   :68.60                               
> save.image("FirstProject.Rdata")
> rm(list=ls())

 load:

> ls()
character(0)
> setwd("F:\\RProjects")
> getwd()
[1] "F:/RProjects"
> load("FirstProject.Rdata")
> load(file.choose())
Error in file.choose() : file choice cancelled
> #session: load workspace

 1.11 writing scripts

 

LungCapData = read.talbe(file="C:\Users\lenovo\Desktop\ExcelData.txt", header=T, seq="\t")

attach(LungCapData)

projectWD <- "F:\\RProjects"

setwd(projectWD)

getwd()

load("FirstProject.Rdata")

ls()

MeanAge <-mean(Age)

#make a histogram of Age
hist(Age) 

summary(LungCapData)

#conduct a t-test for comparing mean LungCap of smokers and non-smokers
t.test(LungCap ~ Smoke)

save.image("FirstProject.Rdata")

rm(list=ls())

class(Gender)

mean(Age)

LungCap[1:5]

  1.12 install packages

> help("install.packages")
> install.packages("epiR")
also installing the dependency ‘BiasedUrn’

trying URL 'https://cran.rstudio.com/bin/windows/contrib/3.4/BiasedUrn_1.07.zip'
Content type 'application/zip' length 362906 bytes (354 KB)
downloaded 354 KB

trying URL 'https://cran.rstudio.com/bin/windows/contrib/3.4/epiR_0.9-93.zip'
Content type 'application/zip' length 330825 bytes (323 KB)
downloaded 323 KB

package ‘BiasedUrn’ successfully unpacked and MD5 sums checked
package ‘epiR’ successfully unpacked and MD5 sums checked

The downloaded binary packages are in
	C:\Users\lenovo\AppData\Local\Temp\Rtmp6ryLEu\downloaded_packages
> install.packages()
Error in install.packages : argument "pkgs" is missing, with no default
> library(epiR)
载入需要的程辑包:survival
Package epiR 0.9-93 is loaded
Type help(epi.about) for summary information


> help(package = epiR)
> remove.packages("epiR")
Removing package from ‘F:/R-3.4.3/library’
(as ‘lib’ is unspecified)

Restarting R session...

  1.13 customizing r studio

2.1 make bar charts and pie charts in r

> LungCapData <- read.table(file.choose(), header=T, sep="\t")
> attach(LungCapData)
> dim(LungCapData)
[1] 10  6
> names(LungCapData)
[1] "LungCap"   "Age"       "Height"    "Smoke"     "Gender"    "Caesarean"
> ?barplot
> table(Gender)
Gender
female   male 
     4      6 
> count <- table(Gender)
> count
Gender
female   male 
     4      6 
> table(Gender)/10
Gender
female   male 
   0.4    0.6 
> percent <- table(Gender)/10
> barplot(count)
> barplot(percent)
> barplot(percent, main="TITLE", xlab="Gender", ylab="%")
> barplot(percent, main="TITLE", xlab="Gender", ylab="%", las=1)
> barplot(percent, main="TITLE", ylab="Gender", xlab="%", las=1, names.arg=c("Female", "Male"), horiz=T)
> pie(count)
> pie(count, main='TITLE')
> box()

  2.2 make boxplots and boxplots with groups

> boxplot(LungCap)
> quantile(LungCap, probs=c(0,0.25,0.5,0.75,1))
    0%    25%    50%    75%   100% 
6.0100 6.0325 6.0550 6.0775 6.1000 
> boxplot(LungCap, main="Boxplot", ylab="Lung capacity")
> boxplot(LungCap, main="Boxplot", ylab="Lung capacity", ylim=c(6, 6.1))
> boxplot(LungCap, main="Boxplot", ylab="Lung capacity", ylim=c(6, 6.1), las=1)
> boxplot(LungCap ~ Gender)
> boxplot(LungCap ~ Gender, main="Boxplot by Gender")
> boxplot(LungCap[Gender=="female"], LungCap[Gender=="male"])

 2.2 make stratified boxplots

cut right defaults true(right closed interval); right = FALSE (right-open interval)

> Age[1:5]
[1]  6 18 16 14  5
> AgeGroups <- cut(Age, breaks=c(0,13,15,17,25), labels = c("<=13","14/15","16/17","18+"))
> AgeGroups[1:5]
[1] <=13  18+   16/17 14/15 <=13 
Levels: <=13 14/15 16/17 18+
> boxplot(LungCap, ylab="LungCapacity", main="Boxplot of LungCap", las=1)
> boxplot(LungCap ~ Smoke, ylab="LungCapacity", main="LungCap vs Smoke", las=1)
> boxplot(LungCap[Age>=18] ~ Smoke, ylab="LungCapacity", main="LungCap vs Smoke for 18+", las=1)
Error in model.frame.default(formula = LungCap[Age >= 18] ~ Smoke) : 
  变数的长度不一样('Smoke')
> boxplot(LungCap[Age>=18] ~ Smoke[Age>=18], ylab="LungCapacity", main="LungCap vs Smoke for 18+", las=1)
> 
> Age[1:5]
[1]  6 18 16 14  5
> AgeGroups[1:5]
[1] <=13  18+   16/17 14/15 <=13 
Levels: <=13 14/15 16/17 18+
> levels(AgeGroups)
[1] "<=13"  "14/15" "16/17" "18+"  
> boxplot(LungCap ~ Smoke*AgeGroups, ylab="LungCapacity", main="LungCap vs Smoke by agegroups", las=1)
> boxplot(LungCap ~ Smoke*AgeGroups, ylab="LungCapacity", main="LungCap vs Smoke by agegroups", las=2)
> 
> boxplot(LungCap ~ Smoke*AgeGroups, ylab="LungCapacity", main="LungCap vs Smoke by agegroups", las=2, col=c(4,2)) #4 for blue(non-smokers), 2 for red(smokers)

  2.3 make histogram

 

> head(LungCapData)
  LungCap Age Height Smoke Gender Caesarean
1   6.475   6   62.1    no   male        no
2  10.125  18   74.7   yes female        no
3   9.550  16   69.7    no female       yes
4  11.125  14   71.0    no   male        no
5   4.800   5   56.9    no   male        no
6   6.225  11   58.7    no female        no
> ?hist
> hist(LungCap)
> hist(LungCap, freq=F)
> hist(LungCap, prob=T)
> hist(LungCap, prob=T, ylim=c(0,0.2))
> hist(LungCap, prob=T, ylim=c(0,0.2), breaks=7)
> hist(LungCap, prob=T, ylim=c(0,0.2), breaks=14)
> hist(LungCap, prob=T, ylim=c(0,0.2), breaks=seq(from=0, to=16, by=2), main="Boxplot of Lung Capacity", xlab="Lung Capacity", las=1)
> lines(density(LungCap))
> lines(density(LungCap), col=2, lwd=3)

  2.4 make stem and leat plots in r

> femaleLungCap <- LungCap[Gender=="female"]
> stem(femaleLungCap)

  The decimal point is at the |

   0 | 5
   1 | 0135689
   2 | 0033456777789999
   3 | 0122457788999999
   4 | 012333344555556666677777899
   5 | 0000122222334466666777778999
   6 | 000111111122222222233345555556666667777777788888999999
   7 | 000123334444444445555666667778888888999999
   8 | 000000001111122222333333444444555556666666666777777888888888899
   9 | 0000000011122223333344455556666777788888999999
  10 | 000011111222334445555666777778899
  11 | 00111223556678888
  12 | 1222479
  13 | 1

> stem(femaleLungCap, scale=2)

  The decimal point is at the |

   0 | 5
   1 | 013
   1 | 5689
   2 | 00334
   2 | 56777789999
   3 | 01224
   3 | 57788999999
   4 | 012333344
   4 | 555556666677777899
   5 | 00001222223344
   5 | 66666777778999
   6 | 00011111112222222223334
   6 | 5555556666667777777788888999999
   7 | 00012333444444444
   7 | 5555666667778888888999999
   8 | 000000001111122222333333444444
   8 | 555556666666666777777888888888899
   9 | 00000000111222233333444
   9 | 55556666777788888999999
  10 | 00001111122233444
  10 | 5555666777778899
  11 | 00111223
  11 | 556678888
  12 | 12224
  12 | 79
  13 | 1

  2.5 stacked barcharts, clustered barcharts and mosaic plots

> ?barplot
> Table1 <- table(Smoke, Gender)
> Table1
     Gender
Smoke female male
  no     314  334
  yes     44   33
> barplot(Table1)
> barplot(Table1, beside = T)
> barplot(Table1, beside = T, legend.text = T)
> barplot(Table1, beside = T, legend.text = c("Non-smoker", "Smoker"), main="Gender and Smoking", xlab="Gender", las=1)
> barplot(Table1, beside = T, legend.text = c("Non-smoker", "Smoker"), main="Gender and Smoking", xlab="Gender", las=1, col=c(2,4))
> 
> mosaicplot(Table1)

 

#given one is female, probability of smoking & non-smoking
> summary(Smoke[Gender=="female"])/358
      no      yes 
0.877095 0.122905

  2.5 scatterplot

> ?plot
> cor(Height, Age)
[1] 0.8357368
> plot(Age, Height)
> plot(Age, Height, main="Scatterplot", xlab = "AGE", ylab = "HEIGHT", las=1, xlim = c(0,25))
> plot(Age, Height, main="Scatterplot", xlab = "AGE", ylab = "HEIGHT", las=1, xlim = c(0,25), cex = 0.5)
> plot(Age, Height, main="Scatterplot", xlab = "AGE", ylab = "HEIGHT", las=1, xlim = c(0,25), pch = 8)
> plot(Age, Height, main="Scatterplot", xlab = "AGE", ylab = "HEIGHT", las=1, xlim = c(0,25), pch = 8, col=2)
> 
> abline(lm(Height ~ Age))
> abline(lm(Height ~ Age), col=4)
> lines(smooth.spline(Age, Height))
> lines(smooth.spline(Age, Height), lty=2, lwd=10)

  

 2.7 mean, standard deviation, frequencies in r

> help("mean")
> table(Smoke)
Smoke
 no yes 
648  77 
> table(Smoke)/725
Smoke
       no       yes 
0.8937931 0.1062069 
> length(Smoke)
[1] 725
> table(Smoke)/length(Smoke)
Smoke
       no       yes 
0.8937931 0.1062069 
> table(Smoke, Gender)
     Gender
Smoke female male
  no     314  334
  yes     44   33
> mean(LungCap)
[1] 7.863148
> mean(LungCap, trim=0.10) #remove top 10 and bottom 10 percent
[1] 7.938081
> median(LungCap)
[1] 8
> sd(LungCap)
[1] 2.662008
> sqrt(var(LungCap))
[1] 2.662008
> 
> sd(LungCap)^2
[1] 7.086288
> min(LungCap)
[1] 0.507
> max(LungCap)
[1] 14.675
> range(LungCap)
[1]  0.507 14.675
> quantile
function (x, ...) 
UseMethod("quantile")
<bytecode: 0x000000000927f468>
<environment: namespace:stats>
> quantile(LungCap, probs=0.90)
   90% 
11.205 
> quantile(LungCap, probs=c(0.2, 0.5, 0.9, 1))
   20%    50%    90%   100% 
 5.645  8.000 11.205 14.675 
> sum(LungCap)
[1] 5700.782
> sum(LungCap)/length(LungCap)
[1] 7.863148
>
> cor(LungCap, Age)
[1] 0.8196749
> cor(LungCap, Age, method="spearman")
[1] 0.8172464
> 
> cov(LungCap, Age)
[1] 8.738289
> var(LungCap, Age)
[1] 8.738289
> summary(LungCap)
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
  0.507   6.150   8.000   7.863   9.800  14.675 
> summary(Smoke)
 no yes 
648  77 
> summary(LungCapData)
    LungCap            Age            Height      Smoke        Gender    Caesarean
 Min.   : 0.507   Min.   : 3.00   Min.   :45.30   no :648   female:358   no :561  
 1st Qu.: 6.150   1st Qu.: 9.00   1st Qu.:59.90   yes: 77   male  :367   yes:164  
 Median : 8.000   Median :13.00   Median :65.40                                   
 Mean   : 7.863   Mean   :12.33   Mean   :64.84                                   
 3rd Qu.: 9.800   3rd Qu.:15.00   3rd Qu.:70.30                                   
 Max.   :14.675   Max.   :19.00   Max.   :81.80  

  3.1 Binomial distribution in r

> ?dbinom
> #3 sucesses on 20 trials(1/6 probability to success)
> dbinom(x=3, size=20, prob=1/6)
[1] 0.2378866
> dbinom(x=0:3, size=20, prob=1/6)
[1] 0.02608405 0.10433621 0.19823881 0.23788657
> #P(X <= 3)
> sum(dbinom(x=0:3, size=20, prob=1/6))
[1] 0.5665456
> pbinom(q=3, size=20, prob=1/6)
[1] 0.5665456
> pbinom(q=3, size=20, prob=1/6, lower.tail = T)
[1] 0.5665456

  3.2 poisson distribution in r

> help(dpois)
> dpois(x=4, lambda = 7)
[1] 0.09122619
> dpois(x=0:4, lambda = 7)
[1] 0.000911882 0.006383174 0.022341108 0.052129252 0.091226192
> ppois(q=4, lambda = 7)
[1] 0.1729916
> ppois(q=12, lambda = 7, lower.tail = F)
[1] 0.02699977
> #P(X > 12)

  3.3 normal distribution in r

dnorm. Given a set of values it returns the height of the probability distribution at each point. If you only give the points it assumes you want to use a mean of zero and standard deviation of one. 

> ?pnorm
> pnorm(q=70, mean=75, sd=5)
[1] 0.1586553
> pnorm(q=85, mean=75, sd=5, lower.tail = F)
[1] 0.02275013
> #P(Z >= 1)
> pnorm(q=1, mean=0, sd=1, lower.tail = F)
[1] 0.1586553
> #find Q1
> qnorm(p=0.25, mean=75, sd=5, lower.tail = T)
[1] 71.62755
> qnorm(p=0.5, mean=75, sd=5, lower.tail = T)
[1] 75
> x = seq(from=55, to=95, by=0.25)
> x
  [1] 55.00 55.25 55.50 55.75 56.00 56.25 56.50 56.75 57.00 57.25 57.50 57.75 58.00 58.25 58.50 58.75
 [17] 59.00 59.25 59.50 59.75 60.00 60.25 60.50 60.75 61.00 61.25 61.50 61.75 62.00 62.25 62.50 62.75
 [33] 63.00 63.25 63.50 63.75 64.00 64.25 64.50 64.75 65.00 65.25 65.50 65.75 66.00 66.25 66.50 66.75
 [49] 67.00 67.25 67.50 67.75 68.00 68.25 68.50 68.75 69.00 69.25 69.50 69.75 70.00 70.25 70.50 70.75
 [65] 71.00 71.25 71.50 71.75 72.00 72.25 72.50 72.75 73.00 73.25 73.50 73.75 74.00 74.25 74.50 74.75
 [81] 75.00 75.25 75.50 75.75 76.00 76.25 76.50 76.75 77.00 77.25 77.50 77.75 78.00 78.25 78.50 78.75
 [97] 79.00 79.25 79.50 79.75 80.00 80.25 80.50 80.75 81.00 81.25 81.50 81.75 82.00 82.25 82.50 82.75
[113] 83.00 83.25 83.50 83.75 84.00 84.25 84.50 84.75 85.00 85.25 85.50 85.75 86.00 86.25 86.50 86.75
[129] 87.00 87.25 87.50 87.75 88.00 88.25 88.50 88.75 89.00 89.25 89.50 89.75 90.00 90.25 90.50 90.75
[145] 91.00 91.25 91.50 91.75 92.00 92.25 92.50 92.75 93.00 93.25 93.50 93.75 94.00 94.25 94.50 94.75
[161] 95.00
> dens <- dnorm(x, mean=75, sd=5)
> plot(x, dens)
> plot(x, dens, type="l")
> abline(v=75)
> rand <- rnorm(n=40, mean=75, sd=5)
> rand
 [1] 76.56584 78.34722 68.15748 68.87978 78.50993 72.29160 76.16492 78.02231 78.74289 75.19591
[11] 78.29987 72.30819 77.70877 71.75129 73.59638 76.28455 70.05909 72.02891 73.23243 82.19271
[21] 75.32922 70.91549 74.27356 70.43975 78.89277 70.59778 82.35224 61.60983 81.34892 81.55996
[31] 70.97721 70.83635 80.73113 67.71210 74.35846 81.16290 70.40886 78.51800 66.49179 75.40734
> hist(rand)

  3.4 t distribution

> #t follows a t-distribution, with mean=0, standard deviation=1, 25 degrees of freedom
> help(pt)
> #P(t > 2.3) t-stat=2.3 df=25 one-sided pvalue
> pt(q=2.3, df=25, lower.tail = F)
[1] 0.01503675
> #two-sided pvalue
> pt(q=2.3, df=25, lower.tail = F) + pt(q=2.3, df=25, lower.tail = T)
[1] 1
> pt(q=2.3, df=25, lower.tail = F)*2
[1] 0.03007351
> pt(q=2.3, df=25, lower.tail = F) + pt(q=-2.3, df=25, lower.tail = T)
[1] 0.03007351
> 
> #find t for 95% confidence
> #value of t with 2.5% in each tail
> qt(p=0.025, df=25, lower.tail = T)
[1] -2.059539
> help(pf)
> help(pexp)

  

转载于:https://www.cnblogs.com/howlowl/p/8495663.html

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值