版本-正则匹配
> sub("\\b(\\w)(.*?)\\b","\\U\\1\\L\\2",c("CXCR4","GZMK","LOXL2"),perl = T)
[1] "Cxcr4" "Gzmk" "Loxl2"
版本1
#改进版本
human_gene <- c("PTPRC", "EPCAM", "MME", "CD3G", "CD3E", "CD68", "CD79A", "RP11-34P13.8") #若干人类基因
upper_low <- function(var) {
var = tolower(var)
first_letter = toupper( substr(var ,1 ,1) )
word = paste0(first_letter ,substring(var ,2 ))
return(word)
}
mouse_gene = sapply(human_gene, upper_low)
mouse_gene #转换结果
PTPRC EPCAM MME CD3G CD3E
“Ptprc” “Epcam” “Mme” “Cd3g” “Cd3e”
CD68 CD79A RP11-34P13.8
“Cd68” “Cd79a” “Rp11-34p13.8”
版本2
#自己瞎造了个丑陋的轮子,欢迎批评指正
#事后证明果然很丑陋
library("hash") #加载能用哈希数据结构的包
human_gene <- c("PTPRC","EPCAM","MME","CD3G","CD3E","CD68","CD79A") #若干人类基因
upper_low <- function(var) {
hash_letters <- hash( c(toupper( letters ),0,1:9), c(letters ,0,1:9))
position=1
for(letter in strsplit(var,split = "")[[1]]){
if(position==1){
word=toupper(letter);
position=position+1;
next
}
if(is.null(hash_letters[[letter]])){
word=paste0(word,letter) #应付此类基因名字"RP11-34P13.8"
}else{
word=paste0(word,hash_letters[[letter]])
}
position=position+1
}
word
}
mouse_gene=sapply(human_gene, upper_low)
mouse_gene #转换结果
输出结果
PTPRC EPCAM MME CD3G CD3E CD68 CD79A
“Ptprc” “Epcam” “Mme” “Cd3g” “Cd3e” “Cd68” “Cd79a”
参考
https://mp.weixin.qq.com/s/1iuKrNwtpU4Z-Jz7SdaA3w