#jaccardcoef系数
jaccardCoef<-function(AUTHOR_ID_A,AUTHOR_ID_B){
keyword_id_A<-author_keyword_count[author_keyword_count$AUTHOR_ID==AUTHOR_ID_A,]$KEYWORD_ID
keyword_id_A_count<-author_keyword_count[author_keyword_count$AUTHOR_ID==AUTHOR_ID_A,]$KEYWORD_COUNT
keyword_id_B<-author_keyword_count[author_keyword_count$AUTHOR_ID==AUTHOR_ID_B,]$KEYWORD_ID
keyword_id_B_count<-author_keyword_count[author_keyword_count$AUTHOR_ID==AUTHOR_ID_B,]$KEYWORD_COUNT
keyword_A<-rep(keyword_id_A,keyword_id_A_count)
keyword_B<-rep(keyword_id_B,keyword_id_B_count)
intersect_A_B<-intersect(keyword_A,keyword_B)
union_A_B<-union(keyword_A,keyword_B)
jaccardCoef<-length(intersect_A_B)/length(union_A_B)
jaccardCoef
}
jaccardCoef(2,3)
#向量的标准化
std_m<-function(x){
x<-as.matrix(x)
nc<-nrow(x)
i=1
s=0
for(i in 1:nc){
s=s+x[i]^2
}
x<-x/sqrt(s)
}
v<-c(1,1,1,4,5)
a<-std_m(v)
a
#求cosineSim
install.packages("lsa")
library(SnowballC)
library(lsa)
cosineSim<-function(AUTHOR_ID_A,AUTHOR_ID_B){
keyword_id_A<-author_keyword_count[author_keyword_count$AUTHOR_ID==AUTHOR_ID_A,]$KEYWORD_ID
keyword_id_A_count<-author_keyword_count[author_keyword_count$AUTHOR_ID==AUTHOR_ID_A,]$KEYWORD_COUNT
keyword_id_B<-author_keyword_count[author_keyword_count$AUTHOR_ID==AUTHOR_ID_B,]$KEYWORD_ID
keyword_id_B_count<-author_keyword_count[author_keyword_count$AUTHOR_ID==AUTHOR_ID_B,]$KEYWORD_COUNT
union_A_B<-union(keyword_id_A,keyword_id_B)
length_union<-length(union_A_B)
init_A<-rep(0,length_union)
init_B<-rep(0,length_union)
i=1
j=1
for(i in 1:length_union){
init_A[i]<-ifelse(match(union_A_B[i],keyword_id_A),author_keyword_count[author_keyword_count$AUTHOR_ID==AUTHOR_ID_A&author_keyword_count$KEYWORD_ID==union_A_B[i],]$KEYWORD_COUNT,0)
i=i+1
}
for(j in 1:length_union){
init_B[j]<-ifelse(match(union_A_B[j],keyword_id_B),author_keyword_count[author_keyword_count$AUTHOR_ID==AUTHOR_ID_B&author_keyword_count$KEYWORD_ID==union_A_B[j],]$KEYWORD_COUNT,0)
j=j+1
}
init_A[is.na(init_A)]<-0
init_B[is.na(init_B)]<-0
init_A<-as.vector(std_m(init_A))
init_B<-as.vector(std_m(init_B))
cosineSim<-cosine(init_A,init_B)
cosineSim
}
a<-cosineSim(2,3)
a
jaccardCoef<-function(AUTHOR_ID_A,AUTHOR_ID_B){
keyword_id_A<-author_keyword_count[author_keyword_count$AUTHOR_ID==AUTHOR_ID_A,]$KEYWORD_ID
keyword_id_A_count<-author_keyword_count[author_keyword_count$AUTHOR_ID==AUTHOR_ID_A,]$KEYWORD_COUNT
keyword_id_B<-author_keyword_count[author_keyword_count$AUTHOR_ID==AUTHOR_ID_B,]$KEYWORD_ID
keyword_id_B_count<-author_keyword_count[author_keyword_count$AUTHOR_ID==AUTHOR_ID_B,]$KEYWORD_COUNT
keyword_A<-rep(keyword_id_A,keyword_id_A_count)
keyword_B<-rep(keyword_id_B,keyword_id_B_count)
intersect_A_B<-intersect(keyword_A,keyword_B)
union_A_B<-union(keyword_A,keyword_B)
jaccardCoef<-length(intersect_A_B)/length(union_A_B)
jaccardCoef
}
jaccardCoef(2,3)
#向量的标准化
std_m<-function(x){
x<-as.matrix(x)
nc<-nrow(x)
i=1
s=0
for(i in 1:nc){
s=s+x[i]^2
}
x<-x/sqrt(s)
}
v<-c(1,1,1,4,5)
a<-std_m(v)
a
#求cosineSim
install.packages("lsa")
library(SnowballC)
library(lsa)
cosineSim<-function(AUTHOR_ID_A,AUTHOR_ID_B){
keyword_id_A<-author_keyword_count[author_keyword_count$AUTHOR_ID==AUTHOR_ID_A,]$KEYWORD_ID
keyword_id_A_count<-author_keyword_count[author_keyword_count$AUTHOR_ID==AUTHOR_ID_A,]$KEYWORD_COUNT
keyword_id_B<-author_keyword_count[author_keyword_count$AUTHOR_ID==AUTHOR_ID_B,]$KEYWORD_ID
keyword_id_B_count<-author_keyword_count[author_keyword_count$AUTHOR_ID==AUTHOR_ID_B,]$KEYWORD_COUNT
union_A_B<-union(keyword_id_A,keyword_id_B)
length_union<-length(union_A_B)
init_A<-rep(0,length_union)
init_B<-rep(0,length_union)
i=1
j=1
for(i in 1:length_union){
init_A[i]<-ifelse(match(union_A_B[i],keyword_id_A),author_keyword_count[author_keyword_count$AUTHOR_ID==AUTHOR_ID_A&author_keyword_count$KEYWORD_ID==union_A_B[i],]$KEYWORD_COUNT,0)
i=i+1
}
for(j in 1:length_union){
init_B[j]<-ifelse(match(union_A_B[j],keyword_id_B),author_keyword_count[author_keyword_count$AUTHOR_ID==AUTHOR_ID_B&author_keyword_count$KEYWORD_ID==union_A_B[j],]$KEYWORD_COUNT,0)
j=j+1
}
init_A[is.na(init_A)]<-0
init_B[is.na(init_B)]<-0
init_A<-as.vector(std_m(init_A))
init_B<-as.vector(std_m(init_B))
cosineSim<-cosine(init_A,init_B)
cosineSim
}
a<-cosineSim(2,3)
a