下载rJava及Rwordseg包,解压至R语言library目录下
library(rJava);
library(Rwordseg);
test1=read.csv("E:\\test\\test.csv") #读取文件
head(test1) #查看数据前几行
test1=test1[test1!=" "];
test1=gsub(pattern="http:[a-zA-Z\\/\\.0-9]+","",teat1); #去除URL
res=gsub(pattern="[我|你|的|是|了]","",test1); #去除特殊词
write.csv(test1,file="E:\\test1\\test1.txt",row.names=FALSE) #输出去除后的文件
words=unlist(lapply(X=test1,FUN=segmentCN)); #分词
<