Creating A Matrix
tibble(
x = 1:5,
y = 1,
z = x ^ 2 + y
)
# A tibble: 5 x 3
# x y z
# <int> <dbl> <dbl>
# 1 1 1 2
# 2 2 1 5
# 3 3 1 10
# 4 4 1 17
# 5 5 1 26
#names of each colomn can be named as "..."
tribble(
~x, ~y, ~z,
"a", 2, 3.6,
"b", 1, 8.5
)
#form the matrix according to the original position of elements
tibble--some tips
1.
df <- data.frame(abc = 1, xyz = "a")
df$x
#> [1] "a"
The $
operator will match any column name that starts with the name following it. Since there is a column named xyz
, the expression df$x
will be expanded to df$xyz
.
2.
enframe()
converts named vectors to a data frame with names and values
enframe(c(a = 1, b = 2, c = 3))
#> # A tibble: 3 x 2
#> name value
#> <chr> <dbl>
#> 1 a 1
#> 2 b 2
#> 3 c 3
Subsetting
# Extract by name
df$x
df[["x"]]
# Extract by position
df[[1]]
#use a pipe
df %>% .$x
df %>% .[["x"]]
readr
`read_csv()` reads comma delimited files
`read_delim()` reads in files with any delimiter
read_csv(
"a,b,c #the first row is regarded as names of variables
1,2,3
4,5,6")
read_csv(
"a,b,c
1,2,3
4,5,6",skip=2)
#add `skip = n` to skip the first `n` lines; or add `comment = "#"` to drop all lines that start with (e.g.) `#`
#add `col_names = FALSE` to label the first row sequentially from `X1` to `Xn`;or add col_names = c("x", "y", "z") to label them as designed
#add na = "." to treat `.` in the data as NA
read_delim("a;b\n1;3",delim=";")
#a special example about quotes
x <- "x,y\n1,'a,b'"
read_delim(x, delim=",", quote = "'")
#> # A tibble: 1 x 2
#> x y
#> <dbl> <chr>
#> 1 1 a,b
parse_*
1.parse_logical(c("TRUE", "FALSE", "NA"))
2.parse_integer(c("1", "2", "3"))
3.parse_number()
parse_number("$100")
parse_number("20%")
parse_number("It costs $123.45")
#[1] 100
#[1] 20
#[1] 123.45
parse_number("123.456.789", locale = locale(grouping_mark = "."))
#[1] 123456789
parse_number("123.456.789", locale = locale(decimal_mark = "."))
#[1] 123.456
grouping_mark is the delimiter that devides a number;
decimal_mark denotes the decimal point
4.parse_factor()
fruit <- c("apple", "banana")
parse_factor(c("apple", "banana", "bananana"), levels = fruit)
parse_factor(c("apple", "banana", "banana"), levels = fruit)
#[1] apple banana <NA>
#[1] apple banana banana
5.parse_datetime()
parse_datetime("2010-10-01T2010")
parse_datetime("20101010")
#[1] "2010-10-01 20:10:00 UTC"
#[1] "2010-10-10 UTC"
parse_datetime("01/02/15", "%m/%d/%y")
#[1] "2015-01-02 UTC"
6.parse_time() #the same goes parse_date()
parse_time("01:10 pm")
parse_time("20:10:01")
#13:10:00
#20:10:01
Settings about date and time:
Year:
%Y(4 figures)
%y(2 figures)
Month:
%m(2 figures)
%b(simplified,like Jan)
%B(complete,like January)
Day:
%d
Hour:
%H(0-23)
%I(0-12,which must pair with %p)
%p(a.m. or p.m.) #like ("%I%M:%OS %p")
Minute:
%M
Second:
%OS
#Exercise
d1 <- "January 1, 2010"
d2 <- "2015-Mar-07"
d3 <- "06-Jun-2017"
d4 <- c("August 19 (2015)", "July 1 (2015)")
d5 <- "12/30/14" # Dec 30, 2014
t1 <- "1705"
t2 <- "11:15:10.12 PM"
parse_date(d1, "%B %d, %Y")
#> [1] "2010-01-01"
parse_date(d2, "%Y-%b-%d")
#> [1] "2015-03-07"
parse_date(d3, "%d-%b-%Y")
#> [1] "2017-06-06"
parse_date(d4, "%B %d (%Y)")
#> [1] "2015-08-19" "2015-07-01"
parse_date(d5, "%m/%d/%y")
#> [1] "2014-12-30"
parse_time(t1, "%H%M")
#> 17:05:00
parse_time(t2, "%H:%M:%OS %p")
#> 23:15:10.12