Test whether you have understood all the codes below:
setwd()
getwd()
diabetes
=c("Type1","Type2","Type1","Type2","Type2","Type2","Type1","Type1","Type2","Type1")
install.packages(tydiverse)
library(dplyr)
detach("package:stats", unload = TRUE)
data("USArrests")
head(USArrests)
summary(USArrests)
sd(USArrests$Murder)
View(USArrests)
table(UCBAdmissions)
str(UCBAdmissions)
levels(gss_cat$rincome)
shapiro.test(USArrests$Murder)
load("E:/RGEB/2015 Millennium Cohort Study/mcs.dta")
library(haven)
mcs <- read_dta("2015 Millennium Cohort Study/mcs.dta")
mcs$math<-as.factor(mcs$mths)
mcs$math<-recode("1"="Strongly Disagree", "2"="Disagree","3"="Agree", "4"="Strongly Agree")
mcs$science=recode(mcs$math,"1"="Strongly Disagree", "2"="Disagree","3"="Agree", "4"="Strongly Agree")
gfk_cleaned_eul$birthyear = 2014 - gfk_cleaned_eul$age
gfk_cleaned_eul$birthyear_cat = cut(gfk_cleaned_eul$birthyear, breaks = c(-Inf, 1945, 1965, 1985, 1997, Inf), labels = c("(-Inf,1945]", "(1945,1964]", "(1965,1984]", "(1985,1996]", "(1997,Inf]"))
gfk_cleaned_eul$birthyear_cat = recode (gfk_cleaned_eul$birthyear_cat,"(-Inf,1945]"= "born in 1945 or before","(1945,1964]"="Boomers","(1965,1984]"="GenX","(1985,1996]"="Millenium","(1997,Inf]"="GenZ")
gfk_excel_version <- read_excel("gfk_excel_version.xls")
gfk_excel_version$hhincome<-na_if(gfk_excel_version$hhincome,"Refused")
gfk_excel_version$hhincome<-factor(gfk_excel_version$hhincome, levels=c("Over 200,000","150,000 - 199,999","100,000 - 149,999","95,000 - 99,999","90,000 - 94,999","85,000 - 89,999","80,000 - 84,999","75,000 - 79,999","70,000 - 74,999","65,000 - 69,999","60,000 - 64,999","55,000 - 59,999","50,000 - 54,999","45,000 - 49,999","40,000 - 44,999","35,000 - 39,999","30,000 - 34,999","25 000 - 29 999","20,000 - 24,999","15,000 - 19,999","10,000 - 14,999","5,000 - 10,000","Under 5,000"))
nlevels(gfk_excel_version$hhincome)
min_birthyear <- min(gfk_cleaned_eul$birthyear)
print(min_birthyear)
View(SSE_students_data)
cuhksz_students_h <- rbind(SME_students_data,SSE_students_data)
mean(cuhksz_students_h$BMI)
Survey_GE_class_choice_2<-Survey_GE_class_choice_2[,-7]
CUHK_employement_1<-merge(CUHKSZ_employment_survey_1,CUHKSZ_employment_survey_1b, by="ID")
new_name <- "CUHKSZ_employment_survey_2"
assign(new_name, CUHKSZ_employment_survey_2)
CUHK_employement_2 <- CUHK_employement_2[, -ncol(CUHK_employement_2)]
column_index_to_change <- which(column_names == "Month_salary_22.x")
column_names[column_index_to_change] <- new_column_name
help(recode)
coffeenew$newcofnumcat <- cut(coffeenew$nrb_coffee_week, breaks=c(0,2,5,7,11),labels=c("0_2","2_5","5_7","more_than_7"))
Answer:
# Setting and getting working directory
setwd()
getwd()
# Creating a vector 'diabetes'
diabetes <- c("Type1", "Type2", "Type1", "Type2", "Type2", "Type2", "Type1", "Type1", "Type2", "Type1")
# function_name: "c"; function: "combine elements into a vector"; format: "new_vector <- c(...)"
# Installing and loading necessary packages
install.packages("tidyverse")
# function_name: "install.packages"; function: "install specified packages"; format: "install.packages(...)"
library(dplyr)
# function_name: "library"; function: "load specified library"; format: "library(...)"
detach("package:stats", unload = TRUE)
# function_name: "detach"; function: "detach specified package"; format: "detach(..., unload = TRUE)"
# Loading the 'USArrests' dataset and performing basic operations
data("USArrests")
# function_name: "data"; function: "load specified dataset"; format: "data(...)"
head(USArrests)
# function_name: "head"; function: "display the first few rows of a dataset"; format: "head(...)"
summary(USArrests)
# function_name: "summary"; function: "display summary statistics of a dataset"; format: "summary(...)"
sd(USArrests$Murder)
# function_name: "sd"; function: "calculate standard deviation"; format: "new_variable <- sd(...)"
View(USArrests)
# function_name: "View"; function: "open a viewer for a dataset"; format: "View(...)"
# Exploring data in 'UCBAdmissions'
table(UCBAdmissions)
# function_name: "table"; function: "create a table of counts"; format: "table(...)"
# Checking levels in a categorical variable in 'gss_cat'
levels(gss_cat$rincome)
# function_name: "levels"; function: "get the levels of a factor variable"; format: "levels(...)"
# Conducting a Shapiro-Wilk test on 'Murder' column in 'USArrests'
shapiro.test(USArrests$Murder)
# function_name: "shapiro.test"; function: "conduct the Shapiro-Wilk test"; format: "shapiro.test(...)"
# Loading and manipulating data from the '2015 Millennium Cohort Study'
load("E:/RGEB/2015 Millennium Cohort Study/mcs.dta")
# function_name: "load"; function: "load specified file or dataset"; format: "load(...)"
library(haven)
# function_name: "library"; function: "load specified library"; format: "library(...)"
mcs <- read_dta("2015 Millennium Cohort Study/mcs.dta")
# function_name: "read_dta"; function: "read data from a Stata file"; format: "new_dataset <- read_dta(...)"
# Creating a new variable 'birthyear' and categorizing it
gfk_cleaned_eul$birthyear <- 2014 - gfk_cleaned_eul$age
# function_name: "subtract"; function: "subtract one variable from another"; format: "new_variable <- ... - ..."
gfk_cleaned_eul$birthyear_cat <- cut(gfk_cleaned_eul$birthyear, breaks = c(-Inf, 1945, 1965, 1985, 1997, Inf), labels = c("(-Inf,1945]", "(1945,1964]", "(1965,1984]", "(1985,1996]", "(1997,Inf]"))
# function_name: "cut"; function: "create categorical variable by cutting a numeric variable"; format: "new_variable <- cut(...)"
gfk_cleaned_eul$birthyear_cat <- recode(gfk_cleaned_eul$birthyear_cat, "(-Inf,1945]"="born in 1945 or before", "(1945,1964]"="Boomers", "(1965,1984]"="GenX", "(1985,1996]"="Millenium", "(1997,Inf]"="GenZ")
# function_name: "recode"; function: "recode levels of a factor variable"; format: "new_variable <- recode(...)"
# Reading an Excel file and processing 'hhincome' variable
gfk_excel_version <- read_excel("gfk_excel_version.xls")
# function_name: "read_excel"; function: "read data from an Excel file"; format: "new_dataset <- read_excel(...)"
gfk_excel_version$hhincome <- na_if(gfk_excel_version$hhincome, "Refused")
# function_name: "na_if"; function: "replace specific values with NA"; format: "new_variable <- na_if(...)"
gfk_excel_version$hhincome <- factor(gfk_excel_version$hhincome, levels = c("Over 200,000", "150,000 - 199,999", ...))
# function_name: "factor"; function: "convert a variable to a factor with specified levels"; format: "new_variable <- factor(...)"
# Merging and cleaning datasets
CUHK_employement_1 <- merge(CUHKSZ_employment_survey_1, CUHKSZ_employment_survey_1b, by="ID")
# function_name: "merge"; function: "merge datasets by a common variable"; format: "new_dataset <- merge(...)"
new_name <- "CUHKSZ_employment_survey_2"
# function_name: "assign"; function: "assign a value to a variable"; format: "assign(..., ...)"; Note: This line has a placeholder, and the actual value is not provided.
assign(new_name, CUHKSZ_employment_survey_2)
# function_name: "assign"; function: "assign a value to a variable"; format: "assign(..., ...)"
CUHK_employement_2 <- CUHK_employement_2[, -ncol(CUHK_employement_2)]
# function_name: "subset"; function: "remove specified column(s)"; format: "new_dataset <- old_dataset[, -ncol(old_dataset)]"
# Manipulating data in 'coffeenew'
column_index_to_change <- which(column_names == "Month_salary_22.x")
# function_name: "which"; function: "get the index of elements that satisfy a condition"; format: "new_index <- which(...)"
column_names[column_index_to_change] <- new_column_name
# function_name: "replacement"; function: "replace specific values"; format: "new_vector <- old_vector; new_vector[index] <- new_value"
help(recode) # function_name: "help"; function: "display help documentation"; format: "help(...)"
coffeenew$newcofnumcat <- cut(coffeenew$nrb_coffee_week, breaks=c(0,2,5,7,11), labels=c("0_2","2_5","5_7","more_than_7"))
# function_name: "cut"; function: "create categorical variable by cutting a numeric variable"; format: "new_variable <- cut(...)"
Quiz:
-
Question: What R function is used to set the working directory?
- a)
setdir()
- b)
setwd()
- c)
setworking()
- d)
workdir()
- a)
-
Question: Which function installs specified R packages?
- a)
load.packages()
- b)
install.library()
- c)
install.packages()
- d)
library.install()
- a)
-
Question: What function is used to load a specified library in R?
- a)
load()
- b)
library()
- c)
load.library()
- d)
import.library()
- a)
-
Question: In R, which function is used to calculate the standard deviation of a numeric variable?
- a)
calculate_sd()
- b)
std_dev()
- c)
sd()
- d)
variance()
- a)
-
Question: What function opens a viewer for a dataset in R?
- a)
explore()
- b)
browse()
- c)
view()
- d)
View()
- a)
-
Question: In R, what function create a table of counts for categorical data?
- a)
tabulate()
- b)
table()
- c)
count()
- d)
crosstab()
- a)
-
Question: Which function is used to get the levels of a factor variable in R?
- a)
getlevels()
- b)
factorlevels()
- c)
levels()
- d)
factor_levels()
- a)
-
Question: What R function is used to conduct the Shapiro-Wilk test?
- a)
shapiro()
- b)
wilks.test()
- c)
shapiro.test()
- d)
test.shapiro()
- a)
-
Question: Which function reads data from a Stata file in R?
- a)
read_spss()
- b)
read_stata()
- c)
read_sas()
- d)
read_dta()
- a)
-
Question: In R, what function is used to create a categorical variable by cutting a numeric variable into bins?
- a)
bin()
- b)
create_cat()
- c)
cut()
- d)
category()
11. Question: Which function is used to merge datasets by a common variable in R?
- a)
combine()
- b)
merge()
- c)
join()
- d)
concat()
12. Question: What function is used to replace specific values with NA in R?
- a)
replace_na()
- b)
na_replace()
- c)
na_if()
- d)
replace_with_na()
13. Question: In R, which function is used to convert a variable to a factor with specified levels?
- a)
convert_factor()
- b)
to_factor()
- c)
factorize()
- d)
factor()
14. Question: What R function is used to remove specified columns from a dataset?
- a)
remove_cols()
- b)
subset()
- c)
drop_cols()
- d)
exclude()
15. Question: Which function in R is used to get the index of elements that satisfy a condition?
- a)
find()
- b)
locate()
- c)
index()
- d)
which()
16. Question: In R, what function is used to create a categorical variable by cutting a numeric variable into bins with labels?
- a)
categorize()
- b)
label_cut()
- c)
create_category()
- d)
cut()
17. Question: Which function in R displays help documentation for a specified function?
- a)
help()
- b)
info()
- c)
documentation()
- d)
assist()
18. Question: What function is used to replace specific values with new values in R?
- a)
replace_values()
- b)
change()
- c)
recode()
- d)
modify()
19. Question: In R, what function is used to create a new categorical variable based on the values of a numeric variable?
- a)
category_from_numeric()
- b)
create_categorical()
- c)
label_numeric()
- d)
cut()
20. Question: Which function in R is used to replace specific values with new values in a dataset?
- a)
replace()
- b)
modify()
- c)
recalculate()
- d)
recode()
Answer for quiz
-
Answer: b)
setwd()
- Example:
setwd("/path/to/your/directory")
- Example:
-
Answer: c)
install.packages()
- Example:
install.packages("tidyverse")
- Example:
-
Answer: b)
library()
- Example:
library(dplyr)
- Example:
-
Answer: c)
sd()
- Example:
standard_deviation <- sd(data$variable)
- Example:
-
Answer: d)
View()
- Example:
View(data)
- Example:
-
Answer: b)
table()
- Example:
table(factor_data)
- Example:
-
Answer: c)
levels()
- Example:
factor_levels <- levels(factor_data)
- Example:
-
Answer: c)
shapiro.test()
- Example:
shapiro.test(data$numeric_variable)
- Example:
-
Answer: d)
read_dta()
- Example:
dataset <- read_dta("file.dta")
- Example:
-
Answer: c)
cut()
-
Example:
cut_variable <- cut(data$numeric_variable, breaks = c(0, 25, 50, 75, 100))
-
-
Answer: b)
merge()
- Example:
merged_data <- merge(data1, data2, by="common_variable")
- Example:
-
Answer: c)
na_if()
- Example:
data$variable <- na_if(data$variable, "specific_value")
- Example:
-
Answer: d)
factor()
- Example:
data$variable <- factor(data$variable, levels = c("level1", "level2", "level3"))
- Example:
-
Answer: b)
subset()
- Example:
new_data <- subset(data, select = -c(column_to_remove))
- Example:
-
Answer: d)
which()
- Example:
index <- which(data$condition == TRUE)
- Example:
-
Answer: d)
cut()
- Example:
category_variable <- cut(data$numeric_variable, breaks = c(0, 25, 50, 75, 100), labels = c("Low", "Medium", "High"))(注意:按照此代码样例是划分了4个区间,
最后一个区间(75, 100]
将没有与之关联的标签。如果你想要为这个区间指定一个特定的标签,你可以在labels
参数中加入一个额外的标签)
- Example:
-
Answer: a)
help()
- Example:
help(function_name)
- Example:
-
Answer: c)
recode()
- Example:
data$variable <- recode(data$variable, "old_value" = "new_value")
- Example:
-
Answer: d)
cut()
- Example:
category_variable <- cut(data$numeric_variable, breaks = c(0, 25, 50, 75, 100), labels = c("Low", "Medium", "High"))
- Example:
-
Answer: a)
replace()
- Example:
data$variable <- replace(data$variable, data$condition, new_value)
- Example: