Creating a factor
Fix the order and spelling of a string:
x1 <- c("Dec", "Apr", "Jan", "Mar")
month_levels <- c(
"Jan", "Feb", "Mar", "Apr", "May", "Jun",
"Jul", "Aug", "Sep", "Oct", "Nov", "Dec"
)
y1 <- factor(x1, levels = month_levels)
sort(y1)
#[1] Jan Mar Apr Dec
#Levels: Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec
# fix the order according to the given one
f1 <- factor(x1, levels = unique(x1))
#to see the level of a factor
levels(gss_cat$race)
#draw a bar plot
ggplot(gss_cat, aes(race)) +
geom_bar() +
scale_x_discrete(drop = F) #force the level that has no value to display
Reorder
##fct_reorder()
ggplot(relig_summary) +
geom_point(aes(tvhours,fct_reorder(relig, tvhours)))
#fct_reorder() reorders the `relig` according to `tvhours`
##fct_relevel()
#set the reference level(put a level to the front)
fct_relevel(rincome, "Not applicable")
#it can be seen that "Not applicable" is pulled to the front using "level()" as well as in the plot
##fct_reorder2()
by_age <-
gss_cat %>%
filter(!is.na(age)) %>%
group_by(age, marital) %>%
count() %>%
group_by(age) %>%
mutate(prop = n / sum(n))
ggplot(by_age, aes(age, prop, colour = fct_reorder2(marital, age, prop))) +
geom_line(na.rm = T) +
labs(colour = "Marital Status")
#fct_reorder2() reorders the values from the largest "prop" of every "age" of "marital" to the smallest,putting the biggest on the top and smallest on the bottom
##fct_infreq() & fct_rev()
#descending order
gss_cat %>%
mutate(marital = marital %>% fct_infreq()) %>%
ggplot(aes(marital)) +
geom_bar()
#ascending order
gss_cat %>%
mutate(marital = marital %>% fct_infreq() %>% fct_rev()) %>%
ggplot(aes(marital)) +
geom_bar()
Change the value of levels
Replace the right with the left,and count the number of each group:
gss_cat %>%
mutate(partyid = fct_recode(partyid,
"Republican, strong" = "Strong republican",
"Republican, weak" = "Not str republican",
"Independent, near rep" = "Ind,near rep",
"Independent, near dem" = "Ind,near dem",
"Democrat, weak" = "Not str democrat",
"Democrat, strong" = "Strong democrat",
"Other" = "No answer",
"Other" = "Don't know",
"Other" = "Other party"
)) %>%
count(partyid)
gss_cat %>%
mutate(partyid = fct_collapse(partyid,
other = c("No answer", "Don't know", "Other party"),
rep = c("Strong republican", "Not str republican"),
ind = c("Ind,near rep", "Independent", "Ind,near dem"),
dem = c("Not str democrat", "Strong democrat")
)) %>%
count(partyid)
gss_cat %>%
mutate(relig = fct_lump(relig, n = 10)) %>% #lump the observations into ten rows
count(relig, sort = TRUE) %>% #`sort = T` put the levels in descending order
print(n = Inf) #print all the rows(but here it seems useless)