R语言学习笔记(二十二):字符串处理中的函数对比(代码实现)

字符串处理中基本函数的使用

R自带函数与stringr包函数对比

> states <- row.names(USArrests)
> # 提取字符串子集
> substr(x = states, start = 1, stop = 4)
 [1] "Alab" "Alas" "Ariz" "Arka" "Cali" "Colo" "Conn" "Dela" "Flor" "Geor" "Hawa" "Idah" "Illi" "Indi" "Iowa" "Kans" "Kent"
[18] "Loui" "Main" "Mary" "Mass" "Mich" "Minn" "Miss" "Miss" "Mont" "Nebr" "Neva" "New " "New " "New " "New " "Nort" "Nort"
[35] "Ohio" "Okla" "Oreg" "Penn" "Rhod" "Sout" "Sout" "Tenn" "Texa" "Utah" "Verm" "Virg" "Wash" "West" "Wisc" "Wyom"
> abbreviate(states, minlength = 5)
       Alabama         Alaska        Arizona       Arkansas     California       Colorado    Connecticut       Delaware 
       "Alabm"        "Alask"        "Arizn"        "Arkns"        "Clfrn"        "Colrd"        "Cnnct"        "Delwr" 
       Florida        Georgia         Hawaii          Idaho       Illinois        Indiana           Iowa         Kansas 
       "Flord"        "Georg"        "Hawai"        "Idaho"        "Illns"        "Indin"         "Iowa"        "Kanss" 
      Kentucky      Louisiana          Maine       Maryland  Massachusetts       Michigan      Minnesota    Mississippi 
       "Kntck"        "Lousn"        "Maine"        "Mryln"        "Mssch"        "Mchgn"        "Mnnst"        "Mssss" 
      Missouri        Montana       Nebraska         Nevada  New Hampshire     New Jersey     New Mexico       New York 
       "Missr"        "Montn"        "Nbrsk"        "Nevad"        "NwHmp"        "NwJrs"        "NwMxc"        "NwYrk" 
North Carolina   North Dakota           Ohio       Oklahoma         Oregon   Pennsylvania   Rhode Island South Carolina 
       "NrthC"        "NrthD"         "Ohio"        "Oklhm"        "Oregn"        "Pnnsy"        "RhdIs"        "SthCr" 
  South Dakota      Tennessee          Texas           Utah        Vermont       Virginia     Washington  West Virginia 
       "SthDk"        "Tnnss"        "Texas"         "Utah"        "Vrmnt"        "Virgn"        "Wshng"        "WstVr" 
     Wisconsin        Wyoming 
       "Wscns"        "Wymng" 
> # 计算字符串长度
> nchar(states)
 [1]  7  6  7  8 10  8 11  8  7  7  6  5  8  7  4  6  8  9  5  8 13  8  9 11  8  7  8  6 13 10 10  8 14 12  4  8  6 12 12 14 12
[42]  9  5  4  7  8 10 13  9  7
> str_count(states)
 [1]  7  6  7  8 10  8 11  8  7  7  6  5  8  7  4  6  8  9  5  8 13  8  9 11  8  7  8  6 13 10 10  8 14 12  4  8  6 12 12 14 12
[42]  9  5  4  7  8 10 13  9  7
> str_length(states)
 [1]  7  6  7  8 10  8 11  8  7  7  6  5  8  7  4  6  8  9  5  8 13  8  9 11  8  7  8  6 13 10 10  8 14 12  4  8  6 12 12 14 12
[42]  9  5  4  7  8 10 13  9  7
> # 大写和小写
> tolower(states)     # 变为小写
 [1] "alabama"        "alaska"         "arizona"        "arkansas"       "california"     "colorado"       "connecticut"   
 [8] "delaware"       "florida"        "georgia"        "hawaii"         "idaho"          "illinois"       "indiana"       
[15] "iowa"           "kansas"         "kentucky"       "louisiana"      "maine"          "maryland"       "massachusetts" 
[22] "michigan"       "minnesota"      "mississippi"    "missouri"       "montana"        "nebraska"       "nevada"        
[29] "new hampshire"  "new jersey"     "new mexico"     "new york"       "north carolina" "north dakota"   "ohio"          
[36] "oklahoma"       "oregon"         "pennsylvania"   "rhode island"   "south carolina" "south dakota"   "tennessee"     
[43] "texas"          "utah"           "vermont"        "virginia"       "washington"     "west virginia"  "wisconsin"     
[50] "wyoming"       
> toupper(states)     # 变为大写
 [1] "ALABAMA"        "ALASKA"         "ARIZONA"        "ARKANSAS"       "CALIFORNIA"     "COLORADO"       "CONNECTICUT"   
 [8] "DELAWARE"       "FLORIDA"        "GEORGIA"        "HAWAII"         "IDAHO"          "ILLINOIS"       "INDIANA"       
[15] "IOWA"           "KANSAS"         "KENTUCKY"       "LOUISIANA"      "MAINE"          "MARYLAND"       "MASSACHUSETTS" 
[22] "MICHIGAN"       "MINNESOTA"      "MISSISSIPPI"    "MISSOURI"       "MONTANA"        "NEBRASKA"       "NEVADA"        
[29] "NEW HAMPSHIRE"  "NEW JERSEY"     "NEW MEXICO"     "NEW YORK"       "NORTH CAROLINA" "NORTH DAKOTA"   "OHIO"          
[36] "OKLAHOMA"       "OREGON"         "PENNSYLVANIA"   "RHODE ISLAND"   "SOUTH CAROLINA" "SOUTH DAKOTA"   "TENNESSEE"     
[43] "TEXAS"          "UTAH"           "VERMONT"        "VIRGINIA"       "WASHINGTON"     "WEST VIRGINIA"  "WISCONSIN"     
[50] "WYOMING"       
> # 符号替换
> chartr("Tt", "Uu", "AgCTcctTagct")
[1] "AgCUccuUagcu"
> str_replace_all("AgCTcctTagct", pattern = "T", replacement = "U")
[1] "AgCUcctUagct"
> # 字符串连接
> paste("control", 1:3, sep = "_")
[1] "control_1" "control_2" "control_3"
> str_c("control", 1:3, sep = "_")
[1] "control_1" "control_2" "control_3"
> x <- c("I love R", "I'm fascinated by Statisitcs", "I")
> # 包含匹配
> grep(pattern = "love", x = x)
[1] 1
> grep(pattern = "love", x = x, value = TRUE)
[1] "I love R"
> grepl(pattern = "love", x = x)
[1]  TRUE FALSE FALSE
> str_detect(string = x, pattern = "love")
[1]  TRUE FALSE FALSE
> # match返回第一个完全匹配的位置
> match(x = "I",table = x)
[1] 3
> "I" %in% x
[1] TRUE
> # 字符串拆分
> text <- "I love R.\nI'm fascinated by Statisitcs."
> cat(text)
I love R.
I'm fascinated by Statisitcs.
> strsplit(text, split = " ")
[[1]]
[1] "I"           "love"        "R.\nI'm"     "fascinated"  "by"          "Statisitcs."
> strsplit(text, split = "\\s")
[[1]]
[1] "I"           "love"        "R."          "I'm"         "fascinated"  "by"          "Statisitcs."
> str_split(text, pattern = "\\s")
[[1]]
[1] "I"           "love"        "R."          "I'm"         "fascinated"  "by"          "Statisitcs."
> # 匹配替换
> test_vector3 <- c("Without the vowels,We can still read the word.")
> sub(pattern = "[aeiou]",replacement = "-",x = test_vector3)
[1] "W-thout the vowels,We can still read the word."
> gsub(pattern = "[aeiou]",replacement = "-",x = test_vector3)
[1] "W-th--t th- v-w-ls,W- c-n st-ll r--d th- w-rd."
> str_replace_all(string = test_vector3, pattern = "[aeiou]", 
+                 replacement = "-")
[1] "W-th--t th- v-w-ls,W- c-n st-ll r--d th- w-rd."
> # 字符串定制输出
> string <- "Each character string in the input is first split into\n paragraphs 
+ (or lines containing whitespace)"
> strwrap(x = string, width = 30)
[1] "Each character string in the" "input is first split into"    "paragraphs (or lines"         "containing whitespace)"      
> str_wrap(string = string, width = 30)
[1] "Each character string in\nthe input is first split\ninto paragraphs (or lines\ncontaining whitespace)"
> cat(str_wrap(string = string, width = 30))
Each character string in
the input is first split
into paragraphs (or lines
containing whitespace)

转载于:https://www.cnblogs.com/xihehe/p/8306920.html

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值