1.4 vector matrices
> x <- 11 > x [1] 11 > x1 <- c(1,3,5,7,9) > x1 [1] 1 3 5 7 9 > gender <- c("male", "female") > gender [1] "male" "female" > 2:7 [1] 2 3 4 5 6 7 > seq(from=1, to=7, by=1) [1] 1 2 3 4 5 6 7 > seq(from=1, to=7, by=1/3) [1] 1.000000 1.333333 1.666667 2.000000 2.333333 2.666667 3.000000 3.333333 3.666667 4.000000 4.333333 4.666667 [13] 5.000000 5.333333 5.666667 6.000000 6.333333 6.666667 7.000000 > seq(from=1, to=7, by=0.25) [1] 1.00 1.25 1.50 1.75 2.00 2.25 2.50 2.75 3.00 3.25 3.50 3.75 4.00 4.25 4.50 4.75 5.00 5.25 5.50 5.75 6.00 6.25 6.50 [24] 6.75 7.00 > rep(1, times=10) [1] 1 1 1 1 1 1 1 1 1 1 > rep("marin", times=5) [1] "marin" "marin" "marin" "marin" "marin" > rep(1:3, times=5) [1] 1 2 3 1 2 3 1 2 3 1 2 3 1 2 3 > rep(seq(from=2, to=5, by=0.75), times=5) [1] 2.00 2.75 3.50 4.25 5.00 2.00 2.75 3.50 4.25 5.00 2.00 2.75 3.50 4.25 5.00 2.00 2.75 3.50 4.25 5.00 2.00 2.75 3.50 [24] 4.25 5.00 > rep(c("m", "f"), times=5) [1] "m" "f" "m" "f" "m" "f" "m" "f" "m" "f" > x <- 1:5 > y <- c(1,3,5,7,9) > y [1] 1 3 5 7 9 > x [1] 1 2 3 4 5 > x + 10 [1] 11 12 13 14 15 > x - 10 [1] -9 -8 -7 -6 -5 > x * 10 [1] 10 20 30 40 50 > x/2 [1] 0.5 1.0 1.5 2.0 2.5 > x + y [1] 2 5 8 11 14 > x-y [1] 0 -1 -2 -3 -4 > x*y [1] 1 6 15 28 45 > x/y [1] 1.0000000 0.6666667 0.6000000 0.5714286 0.5555556 > > x [1] 1 2 3 4 5 > y [1] 1 3 5 7 9 > y[3] [1] 5 > y[-3] [1] 1 3 7 9 > y[1:3] [1] 1 3 5 > y[c(1,5)] [1] 1 9 > y[-c(1,5)] [1] 3 5 7 > y[y<6] [1] 1 3 5 > y[x<6] [1] 1 3 5 7 9 > matrix(1:9, nrow=3, byrow=TRUE) [,1] [,2] [,3] [1,] 1 2 3 [2,] 4 5 6 [3,] 7 8 9 > mat <- matrix(1:9, nrow=3, byrow=FALSE) > mat [,1] [,2] [,3] [1,] 1 4 7 [2,] 2 5 8 [3,] 3 6 9 > mat <- matrix(1:9, nrow=3, byrow=TRUE) > mat [,1] [,2] [,3] [1,] 1 2 3 [2,] 4 5 6 [3,] 7 8 9 > mat[1, 2] [1] 2 > mat[c(1,3), 2] [1] 2 8 > mat[2] [1] 4 > mat[2,] [1] 4 5 6 > mat[,1] [1] 1 4 7 > mat*10 [,1] [,2] [,3] [1,] 10 20 30 [2,] 40 50 60 [3,] 70 80 90
1.5 import data csv txt
> help("read.csv") > ?read.csv > data1 <- read.csv(file.choose(), header=T) > data1 LungCap Age Height Smoke Gender Caesarean 1 6.01 11 62.3 no male no 2 6.02 15 63.0 yes female no 3 6.03 19 63.7 no female yes 4 6.04 23 64.4 yes female no 5 6.05 27 65.1 no male no 6 6.06 31 65.8 yes male no 7 6.07 35 66.5 no female yes 8 6.08 39 67.2 no male no 9 6.09 43 67.9 yes male no 10 6.10 47 68.6 yes male no > data2 <- read.table(file.choose(), header=T, sep=",") > data2 LungCap Age Height Smoke Gender Caesarean 1 6.01 11 62.3 no male no 2 6.02 15 63.0 yes female no 3 6.03 19 63.7 no female yes 4 6.04 23 64.4 yes female no 5 6.05 27 65.1 no male no 6 6.06 31 65.8 yes male no 7 6.07 35 66.5 no female yes 8 6.08 39 67.2 no male no 9 6.09 43 67.9 yes male no 10 6.10 47 68.6 yes male no > data3 <-read.delim(file.choose(), header=T) > data3 LungCap Age Height Smoke Gender Caesarean 1 6.01 11 62.3 no male no 2 6.02 15 63.0 yes female no 3 6.03 19 63.7 no female yes 4 6.04 23 64.4 yes female no 5 6.05 27 65.1 no male no 6 6.06 31 65.8 yes male no 7 6.07 35 66.5 no female yes 8 6.08 39 67.2 no male no 9 6.09 43 67.9 yes male no 10 6.10 47 68.6 yes male no > data4 <- read.table(file.choose(), header=T, sep="\t") > data4 LungCap Age Height Smoke Gender Caesarean 1 6.01 11 62.3 no male no 2 6.02 15 63.0 yes female no 3 6.03 19 63.7 no female yes 4 6.04 23 64.4 yes female no 5 6.05 27 65.1 no male no 6 6.06 31 65.8 yes male no 7 6.07 35 66.5 no female yes 8 6.08 39 67.2 no male no 9 6.09 43 67.9 yes male no 10
1.6
> Data1 <- read.table(file="C:\\Users\\lenovo\\Desktop\\ExcelData.txt", header=T, sep="\t") > Data2 <- read.table(file.choose(), header=T, sep="\t") > LungCapData <- read.delim("C:/Users/lenovo/Desktop/ExcelData.txt") > View(LungCapData) > rm(Data1) > rm(Data2) > dim(LungCapData) [1] 10 6 > head(LungCapData) LungCap Age Height Smoke Gender Caesarean 1 6.01 11 62.3 no male no 2 6.02 15 63.0 yes female no 3 6.03 19 63.7 no female yes 4 6.04 23 64.4 yes female no 5 6.05 27 65.1 no male no 6 6.06 31 65.8 yes male no > tail(LungCapData) LungCap Age Height Smoke Gender Caesarean 5 6.05 27 65.1 no male no 6 6.06 31 65.8 yes male no 7 6.07 35 66.5 no female yes 8 6.08 39 67.2 no male no 9 6.09 43 67.9 yes male no 10 6.10 47 68.6 yes male no > LungCapData[5:9, ] LungCap Age Height Smoke Gender Caesarean 5 6.05 27 65.1 no male no 6 6.06 31 65.8 yes male no 7 6.07 35 66.5 no female yes 8 6.08 39 67.2 no male no 9 6.09 43 67.9 yes male no > names(LungCapData) [1] "LungCap" "Age" "Height" "Smoke" "Gender" [6] "Caesarean"
1.7 variables and data in r
> mean(Age) Error in mean(Age) : object 'Age' not found > LungCapData$Age [1] 11 15 19 23 27 31 35 39 43 47 > mean(LungCapData$Age) [1] 29 > attach(LungCapData) #detach(LungCapData) > Age [1] 11 15 19 23 27 31 35 39 43 47 > Smoke [1] no yes no yes no yes no no yes yes Levels: no yes > summary(Smoke) no yes 5 5 > mean(Age) [1] 29 > class(LungCap) [1] "numeric" > class(Age) [1] "integer" > class(Height) [1] "numeric" > class(Smoke) [1] "factor" > class(Gender) [1] "factor" > class(Caesarean) [1] "factor" > levels(Smoke) [1] "no" "yes" > levels(Gender) [1] "female" "male" > levels(LungCap) NULL > levels(Age) NULL > summary(LungCapData) LungCap Age Height Smoke Gender Caesarean Min. :6.010 Min. :11 Min. :62.30 no :5 female:4 no :8 1st Qu.:6.032 1st Qu.:20 1st Qu.:63.88 yes:5 male :6 yes:2 Median :6.055 Median :29 Median :65.45 Mean :6.055 Mean :29 Mean :65.45 3rd Qu.:6.077 3rd Qu.:38 3rd Qu.:67.03 Max. :6.100 Max. :47 Max. :68.60 > x <- c(0,1,1,1,0,0,0,0,0,0) > x [1] 0 1 1 1 0 0 0 0 0 0 > class(x) [1] "numeric" > summary(x) Min. 1st Qu. Median Mean 3rd Qu. Max. 0.00 0.00 0.00 0.30 0.75 1.00 > x <- as.factor(x) > x [1] 0 1 1 1 0 0 0 0 0 0 Levels: 0 1 > summary(x) 0 1 7 3
1.8 Subset data in r
> mean(Age[Gender=="female"]) [1] 23 > levels(Gender) [1] "female" "male" > FemData <- LungCapData[Gender=="female"] Error in `[.data.frame`(LungCapData, Gender == "female") : undefined columns selected > FemData <- LungCapData[Gender=="female",] > FemData LungCap Age Height Smoke Gender Caesarean 2 6.02 15 63.0 yes female no 3 6.03 19 63.7 no female yes 4 6.04 23 64.4 yes female no 7 6.07 35 66.5 no female yes > MaleData <- LungCapData[Gender=="male",] > MaleData LungCap Age Height Smoke Gender Caesarean 1 6.01 11 62.3 no male no 5 6.05 27 65.1 no male no 6 6.06 31 65.8 yes male no 8 6.08 39 67.2 no male no 9 6.09 43 67.9 yes male no 10 6.10 47 68.6 yes male no > dim(FemData) [1] 4 6 > dim(MaleData) [1] 6 6 > summary(Gender) female male 4 6 > FemData[1:4, ] LungCap Age Height Smoke Gender Caesarean 2 6.02 15 63.0 yes female no 3 6.03 19 63.7 no female yes 4 6.04 23 64.4 yes female no 7 6.07 35 66.5 no female yes > MaleOver15 <- LungCapData[Gender=="male" & Age>15,] > MaleOver15 LungCap Age Height Smoke Gender Caesarean 5 6.05 27 65.1 no male no 6 6.06 31 65.8 yes male no 8 6.08 39 67.2 no male no 9 6.09 43 67.9 yes male no 10 6.10 47 68.6 yes male no
1.9 logical statements
> Age[1:5] [1] 11 15 19 23 27 > temp <- Age>15 > temp[1:5] [1] FALSE FALSE TRUE TRUE TRUE > temp2 <- as.numeric(Age>15) > temp2[1:5] [1] 0 0 1 1 1 > LungCapData[1:5,] LungCap Age Height Smoke Gender Caesarean 1 6.01 11 62.3 no male no 2 6.02 15 63.0 yes female no 3 6.03 19 63.7 no female yes 4 6.04 23 64.4 yes female no 5 6.05 27 65.1 no male no > FemSmoke <- Gender=="female" & Smoke =="yes" > FemSmoke [1] FALSE TRUE FALSE TRUE FALSE FALSE FALSE FALSE FALSE FALSE > MoreData <- cbind(LungCapData, FemSmoke) > MoreData[1:5,] LungCap Age Height Smoke Gender Caesarean FemSmoke 1 6.01 11 62.3 no male no FALSE 2 6.02 15 63.0 yes female no TRUE 3 6.03 19 63.7 no female yes FALSE 4 6.04 23 64.4 yes female no TRUE 5 6.05 27 65.1 no male no FALSE > rm(list=ls())
1.10 set up a working directory in r
save
> getwd() [1] "F:/RStudio" > projectWD <- "F:\\RProjects" > setwd(projectWD) > getwd() [1] "F:/RProjects" > LungCapData = read.table(file.choose(), header=T, sep="\t") > attach(LungCapData) The following objects are masked from LungCapData (pos = 3): Age, Caesarean, Gender, Height, LungCap, Smoke > MeanAge <- mean(Age) > x <- c(1,2,3,4,5) > y <- 14 > z = summary(LungCapData) > View(LungCapData) > z LungCap Age Height Smoke Gender Caesarean Min. :6.010 Min. :11 Min. :62.30 no :5 female:4 no :8 1st Qu.:6.032 1st Qu.:20 1st Qu.:63.88 yes:5 male :6 yes:2 Median :6.055 Median :29 Median :65.45 Mean :6.055 Mean :29 Mean :65.45 3rd Qu.:6.077 3rd Qu.:38 3rd Qu.:67.03 Max. :6.100 Max. :47 Max. :68.60 > save.image("FirstProject.Rdata") > rm(list=ls())
load:
> ls() character(0) > setwd("F:\\RProjects") > getwd() [1] "F:/RProjects" > load("FirstProject.Rdata") > load(file.choose()) Error in file.choose() : file choice cancelled > #session: load workspace
1.11 writing scripts
LungCapData = read.talbe(file="C:\Users\lenovo\Desktop\ExcelData.txt", header=T, seq="\t") attach(LungCapData) projectWD <- "F:\\RProjects" setwd(projectWD) getwd() load("FirstProject.Rdata") ls() MeanAge <-mean(Age) #make a histogram of Age hist(Age) summary(LungCapData) #conduct a t-test for comparing mean LungCap of smokers and non-smokers t.test(LungCap ~ Smoke) save.image("FirstProject.Rdata") rm(list=ls()) class(Gender) mean(Age) LungCap[1:5]
1.12 install packages
> help("install.packages") > install.packages("epiR") also installing the dependency ‘BiasedUrn’ trying URL 'https://cran.rstudio.com/bin/windows/contrib/3.4/BiasedUrn_1.07.zip' Content type 'application/zip' length 362906 bytes (354 KB) downloaded 354 KB trying URL 'https://cran.rstudio.com/bin/windows/contrib/3.4/epiR_0.9-93.zip' Content type 'application/zip' length 330825 bytes (323 KB) downloaded 323 KB package ‘BiasedUrn’ successfully unpacked and MD5 sums checked package ‘epiR’ successfully unpacked and MD5 sums checked The downloaded binary packages are in C:\Users\lenovo\AppData\Local\Temp\Rtmp6ryLEu\downloaded_packages > install.packages() Error in install.packages : argument "pkgs" is missing, with no default > library(epiR) 载入需要的程辑包:survival Package epiR 0.9-93 is loaded Type help(epi.about) for summary information > help(package = epiR) > remove.packages("epiR") Removing package from ‘F:/R-3.4.3/library’ (as ‘lib’ is unspecified) Restarting R session...
1.13 customizing r studio
2.1 make bar charts and pie charts in r
> LungCapData <- read.table(file.choose(), header=T, sep="\t") > attach(LungCapData) > dim(LungCapData) [1] 10 6 > names(LungCapData) [1] "LungCap" "Age" "Height" "Smoke" "Gender" "Caesarean" > ?barplot > table(Gender) Gender female male 4 6 > count <- table(Gender) > count Gender female male 4 6 > table(Gender)/10 Gender female male 0.4 0.6 > percent <- table(Gender)/10 > barplot(count) > barplot(percent) > barplot(percent, main="TITLE", xlab="Gender", ylab="%") > barplot(percent, main="TITLE", xlab="Gender", ylab="%", las=1) > barplot(percent, main="TITLE", ylab="Gender", xlab="%", las=1, names.arg=c("Female", "Male"), horiz=T) > pie(count) > pie(count, main='TITLE') > box()
2.2 make boxplots and boxplots with groups
> boxplot(LungCap) > quantile(LungCap, probs=c(0,0.25,0.5,0.75,1)) 0% 25% 50% 75% 100% 6.0100 6.0325 6.0550 6.0775 6.1000 > boxplot(LungCap, main="Boxplot", ylab="Lung capacity") > boxplot(LungCap, main="Boxplot", ylab="Lung capacity", ylim=c(6, 6.1)) > boxplot(LungCap, main="Boxplot", ylab="Lung capacity", ylim=c(6, 6.1), las=1) > boxplot(LungCap ~ Gender) > boxplot(LungCap ~ Gender, main="Boxplot by Gender") > boxplot(LungCap[Gender=="female"], LungCap[Gender=="male"])
2.2 make stratified boxplots
cut right defaults true(right closed interval); right = FALSE (right-open interval)
> Age[1:5] [1] 6 18 16 14 5 > AgeGroups <- cut(Age, breaks=c(0,13,15,17,25), labels = c("<=13","14/15","16/17","18+")) > AgeGroups[1:5] [1] <=13 18+ 16/17 14/15 <=13 Levels: <=13 14/15 16/17 18+ > boxplot(LungCap, ylab="LungCapacity", main="Boxplot of LungCap", las=1) > boxplot(LungCap ~ Smoke, ylab="LungCapacity", main="LungCap vs Smoke", las=1) > boxplot(LungCap[Age>=18] ~ Smoke, ylab="LungCapacity", main="LungCap vs Smoke for 18+", las=1) Error in model.frame.default(formula = LungCap[Age >= 18] ~ Smoke) : 变数的长度不一样('Smoke') > boxplot(LungCap[Age>=18] ~ Smoke[Age>=18], ylab="LungCapacity", main="LungCap vs Smoke for 18+", las=1) > > Age[1:5] [1] 6 18 16 14 5 > AgeGroups[1:5] [1] <=13 18+ 16/17 14/15 <=13 Levels: <=13 14/15 16/17 18+ > levels(AgeGroups) [1] "<=13" "14/15" "16/17" "18+" > boxplot(LungCap ~ Smoke*AgeGroups, ylab="LungCapacity", main="LungCap vs Smoke by agegroups", las=1) > boxplot(LungCap ~ Smoke*AgeGroups, ylab="LungCapacity", main="LungCap vs Smoke by agegroups", las=2) > > boxplot(LungCap ~ Smoke*AgeGroups, ylab="LungCapacity", main="LungCap vs Smoke by agegroups", las=2, col=c(4,2)) #4 for blue(non-smokers), 2 for red(smokers)
2.3 make histogram
> head(LungCapData) LungCap Age Height Smoke Gender Caesarean 1 6.475 6 62.1 no male no 2 10.125 18 74.7 yes female no 3 9.550 16 69.7 no female yes 4 11.125 14 71.0 no male no 5 4.800 5 56.9 no male no 6 6.225 11 58.7 no female no > ?hist > hist(LungCap) > hist(LungCap, freq=F) > hist(LungCap, prob=T) > hist(LungCap, prob=T, ylim=c(0,0.2)) > hist(LungCap, prob=T, ylim=c(0,0.2), breaks=7) > hist(LungCap, prob=T, ylim=c(0,0.2), breaks=14) > hist(LungCap, prob=T, ylim=c(0,0.2), breaks=seq(from=0, to=16, by=2), main="Boxplot of Lung Capacity", xlab="Lung Capacity", las=1) > lines(density(LungCap)) > lines(density(LungCap), col=2, lwd=3)
2.4 make stem and leat plots in r
> femaleLungCap <- LungCap[Gender=="female"] > stem(femaleLungCap) The decimal point is at the | 0 | 5 1 | 0135689 2 | 0033456777789999 3 | 0122457788999999 4 | 012333344555556666677777899 5 | 0000122222334466666777778999 6 | 000111111122222222233345555556666667777777788888999999 7 | 000123334444444445555666667778888888999999 8 | 000000001111122222333333444444555556666666666777777888888888899 9 | 0000000011122223333344455556666777788888999999 10 | 000011111222334445555666777778899 11 | 00111223556678888 12 | 1222479 13 | 1 > stem(femaleLungCap, scale=2) The decimal point is at the | 0 | 5 1 | 013 1 | 5689 2 | 00334 2 | 56777789999 3 | 01224 3 | 57788999999 4 | 012333344 4 | 555556666677777899 5 | 00001222223344 5 | 66666777778999 6 | 00011111112222222223334 6 | 5555556666667777777788888999999 7 | 00012333444444444 7 | 5555666667778888888999999 8 | 000000001111122222333333444444 8 | 555556666666666777777888888888899 9 | 00000000111222233333444 9 | 55556666777788888999999 10 | 00001111122233444 10 | 5555666777778899 11 | 00111223 11 | 556678888 12 | 12224 12 | 79 13 | 1
2.5 stacked barcharts, clustered barcharts and mosaic plots
> ?barplot > Table1 <- table(Smoke, Gender) > Table1 Gender Smoke female male no 314 334 yes 44 33 > barplot(Table1) > barplot(Table1, beside = T) > barplot(Table1, beside = T, legend.text = T) > barplot(Table1, beside = T, legend.text = c("Non-smoker", "Smoker"), main="Gender and Smoking", xlab="Gender", las=1) > barplot(Table1, beside = T, legend.text = c("Non-smoker", "Smoker"), main="Gender and Smoking", xlab="Gender", las=1, col=c(2,4)) > > mosaicplot(Table1)
#given one is female, probability of smoking & non-smoking > summary(Smoke[Gender=="female"])/358 no yes 0.877095 0.122905
2.5 scatterplot
> ?plot > cor(Height, Age) [1] 0.8357368 > plot(Age, Height) > plot(Age, Height, main="Scatterplot", xlab = "AGE", ylab = "HEIGHT", las=1, xlim = c(0,25)) > plot(Age, Height, main="Scatterplot", xlab = "AGE", ylab = "HEIGHT", las=1, xlim = c(0,25), cex = 0.5) > plot(Age, Height, main="Scatterplot", xlab = "AGE", ylab = "HEIGHT", las=1, xlim = c(0,25), pch = 8) > plot(Age, Height, main="Scatterplot", xlab = "AGE", ylab = "HEIGHT", las=1, xlim = c(0,25), pch = 8, col=2) > > abline(lm(Height ~ Age)) > abline(lm(Height ~ Age), col=4) > lines(smooth.spline(Age, Height)) > lines(smooth.spline(Age, Height), lty=2, lwd=10)
2.7 mean, standard deviation, frequencies in r
> help("mean") > table(Smoke) Smoke no yes 648 77 > table(Smoke)/725 Smoke no yes 0.8937931 0.1062069 > length(Smoke) [1] 725 > table(Smoke)/length(Smoke) Smoke no yes 0.8937931 0.1062069 > table(Smoke, Gender) Gender Smoke female male no 314 334 yes 44 33 > mean(LungCap) [1] 7.863148 > mean(LungCap, trim=0.10) #remove top 10 and bottom 10 percent [1] 7.938081 > median(LungCap) [1] 8 > sd(LungCap) [1] 2.662008 > sqrt(var(LungCap)) [1] 2.662008 > > sd(LungCap)^2 [1] 7.086288 > min(LungCap) [1] 0.507 > max(LungCap) [1] 14.675 > range(LungCap) [1] 0.507 14.675 > quantile function (x, ...) UseMethod("quantile") <bytecode: 0x000000000927f468> <environment: namespace:stats> > quantile(LungCap, probs=0.90) 90% 11.205 > quantile(LungCap, probs=c(0.2, 0.5, 0.9, 1)) 20% 50% 90% 100% 5.645 8.000 11.205 14.675 > sum(LungCap) [1] 5700.782 > sum(LungCap)/length(LungCap) [1] 7.863148 > > cor(LungCap, Age) [1] 0.8196749 > cor(LungCap, Age, method="spearman") [1] 0.8172464 > > cov(LungCap, Age) [1] 8.738289 > var(LungCap, Age) [1] 8.738289 > summary(LungCap) Min. 1st Qu. Median Mean 3rd Qu. Max. 0.507 6.150 8.000 7.863 9.800 14.675 > summary(Smoke) no yes 648 77 > summary(LungCapData) LungCap Age Height Smoke Gender Caesarean Min. : 0.507 Min. : 3.00 Min. :45.30 no :648 female:358 no :561 1st Qu.: 6.150 1st Qu.: 9.00 1st Qu.:59.90 yes: 77 male :367 yes:164 Median : 8.000 Median :13.00 Median :65.40 Mean : 7.863 Mean :12.33 Mean :64.84 3rd Qu.: 9.800 3rd Qu.:15.00 3rd Qu.:70.30 Max. :14.675 Max. :19.00 Max. :81.80
3.1 Binomial distribution in r
> ?dbinom > #3 sucesses on 20 trials(1/6 probability to success) > dbinom(x=3, size=20, prob=1/6) [1] 0.2378866 > dbinom(x=0:3, size=20, prob=1/6) [1] 0.02608405 0.10433621 0.19823881 0.23788657 > #P(X <= 3) > sum(dbinom(x=0:3, size=20, prob=1/6)) [1] 0.5665456 > pbinom(q=3, size=20, prob=1/6) [1] 0.5665456 > pbinom(q=3, size=20, prob=1/6, lower.tail = T) [1] 0.5665456
3.2 poisson distribution in r
> help(dpois) > dpois(x=4, lambda = 7) [1] 0.09122619 > dpois(x=0:4, lambda = 7) [1] 0.000911882 0.006383174 0.022341108 0.052129252 0.091226192 > ppois(q=4, lambda = 7) [1] 0.1729916 > ppois(q=12, lambda = 7, lower.tail = F) [1] 0.02699977 > #P(X > 12)
3.3 normal distribution in r
dnorm. Given a set of values it returns the height of the probability distribution at each point. If you only give the points it assumes you want to use a mean of zero and standard deviation of one.
> ?pnorm > pnorm(q=70, mean=75, sd=5) [1] 0.1586553 > pnorm(q=85, mean=75, sd=5, lower.tail = F) [1] 0.02275013 > #P(Z >= 1) > pnorm(q=1, mean=0, sd=1, lower.tail = F) [1] 0.1586553 > #find Q1 > qnorm(p=0.25, mean=75, sd=5, lower.tail = T) [1] 71.62755 > qnorm(p=0.5, mean=75, sd=5, lower.tail = T) [1] 75 > x = seq(from=55, to=95, by=0.25) > x [1] 55.00 55.25 55.50 55.75 56.00 56.25 56.50 56.75 57.00 57.25 57.50 57.75 58.00 58.25 58.50 58.75 [17] 59.00 59.25 59.50 59.75 60.00 60.25 60.50 60.75 61.00 61.25 61.50 61.75 62.00 62.25 62.50 62.75 [33] 63.00 63.25 63.50 63.75 64.00 64.25 64.50 64.75 65.00 65.25 65.50 65.75 66.00 66.25 66.50 66.75 [49] 67.00 67.25 67.50 67.75 68.00 68.25 68.50 68.75 69.00 69.25 69.50 69.75 70.00 70.25 70.50 70.75 [65] 71.00 71.25 71.50 71.75 72.00 72.25 72.50 72.75 73.00 73.25 73.50 73.75 74.00 74.25 74.50 74.75 [81] 75.00 75.25 75.50 75.75 76.00 76.25 76.50 76.75 77.00 77.25 77.50 77.75 78.00 78.25 78.50 78.75 [97] 79.00 79.25 79.50 79.75 80.00 80.25 80.50 80.75 81.00 81.25 81.50 81.75 82.00 82.25 82.50 82.75 [113] 83.00 83.25 83.50 83.75 84.00 84.25 84.50 84.75 85.00 85.25 85.50 85.75 86.00 86.25 86.50 86.75 [129] 87.00 87.25 87.50 87.75 88.00 88.25 88.50 88.75 89.00 89.25 89.50 89.75 90.00 90.25 90.50 90.75 [145] 91.00 91.25 91.50 91.75 92.00 92.25 92.50 92.75 93.00 93.25 93.50 93.75 94.00 94.25 94.50 94.75 [161] 95.00 > dens <- dnorm(x, mean=75, sd=5) > plot(x, dens) > plot(x, dens, type="l") > abline(v=75) > rand <- rnorm(n=40, mean=75, sd=5) > rand [1] 76.56584 78.34722 68.15748 68.87978 78.50993 72.29160 76.16492 78.02231 78.74289 75.19591 [11] 78.29987 72.30819 77.70877 71.75129 73.59638 76.28455 70.05909 72.02891 73.23243 82.19271 [21] 75.32922 70.91549 74.27356 70.43975 78.89277 70.59778 82.35224 61.60983 81.34892 81.55996 [31] 70.97721 70.83635 80.73113 67.71210 74.35846 81.16290 70.40886 78.51800 66.49179 75.40734 > hist(rand)
3.4 t distribution
> #t follows a t-distribution, with mean=0, standard deviation=1, 25 degrees of freedom > help(pt) > #P(t > 2.3) t-stat=2.3 df=25 one-sided pvalue > pt(q=2.3, df=25, lower.tail = F) [1] 0.01503675 > #two-sided pvalue > pt(q=2.3, df=25, lower.tail = F) + pt(q=2.3, df=25, lower.tail = T) [1] 1 > pt(q=2.3, df=25, lower.tail = F)*2 [1] 0.03007351 > pt(q=2.3, df=25, lower.tail = F) + pt(q=-2.3, df=25, lower.tail = T) [1] 0.03007351 > > #find t for 95% confidence > #value of t with 2.5% in each tail > qt(p=0.025, df=25, lower.tail = T) [1] -2.059539 > help(pf) > help(pexp)