my_fenci <- function(line){
# line1 <- gsub("http","网址", line ,ignore.case =T)
# line2 <- gsub("[a-z]|#|:|/|[0-9]","", line1 ,ignore.case =T)
# # line3 <- gsub("^[\u2E80-\u9FFF]+$","", line2 ,ignore.case =T)
line2 <- str_extract_all(line, pattern="[\u2E80-\u9FFF]+",simplify = T)
if (ncol(line2)!=1){
line3 <- paste(line2,collapse = ',') #有的文本只有一个词 会在算距离时候出错加一个词add
}else{
line3 <- paste(line2,"add",collapse = ',')
}
line3
keys2 <= line3
}
#
library(stringr)
line <- "| 48394 Clearance Delta vs. RAP Exceeded Limit |"
num <- str_extract_all(line, pattern="[0-9]",simplify = T)
Err_Code <- as.numeric(paste(num,collapse = ""))
num <- gregexpr(line, pattern="[a-zA-Z]")[[1]]
Err_Desc <-substring(line,num[1],num[length(num)])
Err_Code
Err_Desc
> Err_Code
[1] 48394
> Err_Desc
[1] "Clearance Delta vs. RAP Exceeded Limit"