1. 全部symbol都保存
library(tidyverse)
## ── Attaching packages ───────────────────────────── tidyverse 1.2.1 ──
## ✔ ggplot2 3.1.0 ✔ purrr 0.2.5
## ✔ tibble 1.4.2 ✔ dplyr 0.7.8
## ✔ tidyr 0.8.2 ✔ stringr 1.3.1
## ✔ readr 1.3.0 ✔ forcats 0.3.0
## ── Conflicts ──────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
(a <- tibble(probe = c("101a","102a","103a"),symbol = c("aad///ddd",'bbb',"aad///ddv///xxx")))
## # A tibble: 3 x 2
## probe symbol
## <chr> <chr>
## 1 101a aad///ddd
## 2 102a bbb
## 3 103a aad///ddv///xxx
(test1 <- apply(a,
1,
function(x){
str_split(x[2],'///',simplify=T)
}))
## [[1]]
## [,1] [,2]
## [1,] "aad" "ddd"
##
## [[2]]
## [,1]
## [1,] "bbb"
##
## [[3]]
## [,1] [,2] [,3]
## [1,] "aad" "ddv" "xxx"
(test2 <- apply(a,
1,
function(x){
paste(x[1],
str_split(x[2],'///',simplify=T),
sep = "...")
}))
## [[1]]
## [1] "101a...aad" "101a...ddd"
##
## [[2]]
## [1] "102a...bbb"
##
## [[3]]
## [1] "103a...aad" "103a...ddv" "103a...xxx"
unlist(test2)
## [1] "101a...aad" "101a...ddd" "102a...bbb" "103a...aad" "103a...ddv"
## [6] "103a...xxx"
(x <- tibble(unlist(test2)))
## # A tibble: 6 x 1
## `unlist(test2)`
## <chr>
## 1 101a...aad
## 2 101a...ddd
## 3 102a...bbb
## 4 103a...aad
## 5 103a...ddv
## 6 103a...xxx
colnames(x) <- "lala"
(x2 <- separate(x,lala,c("id","symbol")))
## # A tibble: 6 x 2
## id symbol
## <chr> <chr>
## 1 101a aad
## 2 101a ddd
## 3 102a bbb
## 4 103a aad
## 5 103a ddv
## 6 103a xxx
# 上面是针对转录组可以直接这样,但是探针的话,因为中间存在_,所以代码可以改变如下
test2 <- apply(ids,
1,
function(x){
paste(x[1],
str_split(x[2],'///',simplify=T),
sep = "...")
})
x = tibble(unlist(test2))
colnames(x) <- "lala"
ids <- separate(x,lala,c("ID","symbol"),sep = "\\...")
2. 只保存symbol第一个