R语言 recommenderlab 包

## Warning: package 'recommenderlab' was built under R version 3.2.4
## Loading required package: Matrix
## Loading required package: registry
## Loading required package: arules
## Attaching package: 'arules'
## The following objects are masked from 'package:base':
##     %in%, abbreviate, write
## Loading required package: proxy
## Attaching package: 'proxy'
## The following object is masked from 'package:Matrix':
##     as.matrix
## The following objects are masked from 'package:stats':
##     as.dist, dist
## The following object is masked from 'package:base':
##     as.matrix
library(ggplot2) # data(MovieLense) dim(MovieLense)
## [1]  943 1664
## 943 x 1664 rating matrix of class 'realRatingMatrix' with 99392 ratings.
image(sample(MovieLense,500),main="Raw ratings")

qplot(getRatings(MovieLense),binwidth=1,main="histogram of normalized ratings",xlab = "Ratings")

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    1.00    3.00    4.00    3.53    4.00    5.00
#normalized ratings
qplot(getRatings(normalize(MovieLense,method="Z-score")),main="hist of normalized ratings",xlab="rating")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
## -4.8520 -0.6466  0.1084  0.0000  0.7506  4.1280
qplot(rowCounts(MovieLense), binwidth=10, main="Movies rated on Average", xlab="# of users", ylab="# of movies rated")

qplot(colMeans(MovieLense), binwidth=0.1, main="Mean ratings of Movies", xlab="Rating", ylab="# of movies")

## $IBCF_realRatingMatrix
## Recommender method: IBCF
## Description: Recommender based on item-based collaborative filtering (real data).
## Parameters:
##    k method normalize normalize_sim_matrix alpha na_as_zero minRating
## 1 30 Cosine    center                FALSE   0.5      FALSE        NA
## $PCA_realRatingMatrix
## Recommender method: PCA
## Description: Recommender based on PCA approximation (real data).
## Parameters:
##   categories method normalize normalize_sim_matrix alpha na_as_zero
## 1         20 Cosine    center                FALSE   0.5      FALSE
##   minRating
## 1        NA
## $POPULAR_realRatingMatrix
## Recommender method: POPULAR
## Description: Recommender based on item popularity (real data).
## Parameters: None
## $RANDOM_realRatingMatrix
## Recommender method: RANDOM
## Description: Produce random recommendations (real ratings).
## Parameters: None
## $SVD_realRatingMatrix
## Recommender method: SVD
## Description: Recommender based on EM-based SVD approximation from package bcv (real data).
## Parameters:
##   approxRank maxiter normalize minRating
## 1         NA     100    center        NA
## $UBCF_realRatingMatrix
## Recommender method: UBCF
## Description: Recommender based on user-based collaborative filtering (real data).
## Parameters:
##   method nn sample normalize minRating
## 1 cosine 25  FALSE    center        NA
scheme <- evaluationScheme(MovieLense, method="split", train=0.9, k=1, given=10, goodRating=4) scheme
## Evaluation scheme with 10 items given
## Method: 'split' with 1 run(s).
## Training set proportion: 0.900
## Good ratings: >=4.000000
## Data set: 943 x 1664 rating matrix of class 'realRatingMatrix' with 99392 ratings.
algorithms <- list( "random items" = list(name="RANDOM", param=list(normalize = "Z-score")), "popular items" = list(name="POPULAR", param=list(normalize = "Z-score")), "user-based CF" = list(name="UBCF", param=list(normalize = "Z-score", method="Cosine", nn=50, minRating=3)), "item-based CF" = list(name="IBCF", param=list(normalize = "Z-score", method="Cosine")) ) # run algorithms, predict next n movies results <- evaluate(scheme, algorithms, n=c(1, 3, 5, 10, 15, 20))
## RANDOM run fold/sample [model time/prediction time]
##   1  [0.02sec/1.25sec] 
## POPULAR run fold/sample [model time/prediction time]
##   1  [0.16sec/0.21sec] 
## UBCF run fold/sample [model time/prediction time]
##   1  [0.15sec/59.95sec] 
## IBCF run fold/sample [model time/prediction time]
##   1  [506.98sec/0.89sec]
plot(results, annotate = 1:4, legend="topleft")

# See precision / recall
plot(results, "prec/rec", annotate=3)

##               Length Class             Mode
## random items  1      evaluationResults S4  
## popular items 1      evaluationResults S4  
## user-based CF 1      evaluationResults S4  
## item-based CF 1      evaluationResults S4
## List of evaluation results for 4 recommenders:
## Evaluation results for 1 folds/samples using method 'RANDOM'.
## Evaluation results for 1 folds/samples using method 'POPULAR'.
## Evaluation results for 1 folds/samples using method 'UBCF'.
## Evaluation results for 1 folds/samples using method 'IBCF'.
library(plyr) result1<-ldply(avg(results)) head(result1)
##            .id         TP         FP       FN       TN  precision
## 1 random items 0.02105263  0.9789474 59.09474 1593.905 0.02105263
## 2 random items 0.09473684  2.9052632 59.02105 1591.979 0.03157895
## 3 random items 0.20000000  4.8000000 58.91579 1590.084 0.04000000
## 4 random items 0.36842105  9.6315789 58.74737 1585.253 0.03684211
## 5 random items 0.63157895 14.3684211 58.48421 1580.516 0.04210526
## 6 random items 0.82105263 19.1789474 58.29474 1575.705 0.04105263
##         recall          TPR          FPR
## 1 0.0001625365 0.0001625365 0.0006139374
## 2 0.0017415933 0.0017415933 0.0018211266
## 3 0.0028771009 0.0028771009 0.0030075810
## 4 0.0091514632 0.0091514632 0.0060388289
## 5 0.0131256428 0.0131256428 0.0090061881
## 6 0.0171367235 0.0171367235 0.0120237980
result1[,1]<-paste(result1[,1],c(1, 3, 5, 10, 15, 20)) temp_result1<-result1[,c(1,6,7)] f<-function(p,r){ return(2*p*r)/(p+r) } result1_f<-cbind(result1,f=f(temp_result1[,2],temp_result1[,3])) head(result1_f)
##               .id         TP         FP       FN       TN  precision
## 1  random items 1 0.02105263  0.9789474 59.09474 1593.905 0.02105263
## 2  random items 3 0.09473684  2.9052632 59.02105 1591.979 0.03157895
## 3  random items 5 0.20000000  4.8000000 58.91579 1590.084 0.04000000
## 4 random items 10 0.36842105  9.6315789 58.74737 1585.253 0.03684211
## 5 random items 15 0.63157895 14.3684211 58.48421 1580.516 0.04210526
## 6 random items 20 0.82105263 19.1789474 58.29474 1575.705 0.04105263
##         recall          TPR          FPR            f
## 1 0.0001625365 0.0001625365 0.0006139374 6.843643e-06
## 2 0.0017415933 0.0017415933 0.0018211266 1.099954e-04
## 3 0.0028771009 0.0028771009 0.0030075810 2.301681e-04
## 4 0.0091514632 0.0091514632 0.0060388289 6.743183e-04
## 5 0.0131256428 0.0131256428 0.0090061881 1.105317e-03
## 6 0.0171367235 0.0171367235 0.0120237980 1.407015e-03
##                 .id       TP        FP       FN       TN precision
## 18 user-based CF 20 7.231579 11.894737 51.88421 1582.989 0.3819792
## 17 user-based CF 15 5.968421  8.473684 53.14737 1586.411 0.4158444
## 16 user-based CF 10 4.463158  5.263158 54.65263 1589.621 0.4603343
## 12 popular items 20 6.726316 13.273684 52.38947 1581.611 0.3363158
## 11 popular items 15 5.273684  9.726316 53.84211 1585.158 0.3515789
## 15  user-based CF 5 2.600000  2.315789 56.51579 1592.568 0.5287234
##        recall        TPR         FPR          f
## 18 0.17582028 0.17582028 0.007392742 0.13431937
## 17 0.15055639 0.15055639 0.005262247 0.12521608
## 16 0.11854595 0.11854595 0.003264888 0.10914154
## 12 0.13805753 0.13805753 0.008238022 0.09286185
## 11 0.11359184 0.11359184 0.006035319 0.07987300
## 15 0.07230233 0.07230233 0.001433397 0.07645587
moive_re<-Recommender(MovieLense,method="UBCF") moives_pr<-predict(moive_re,MovieLense,n=20) class(moives_pr)
## [1] "topNList"
## attr(,"package")
## [1] "recommenderlab"
## [[1]]
##  [1] "Glory (1989)"                                                               
##  [2] "Schindler's List (1993)"                                                    
##  [3] "Casablanca (1942)"                                                          
##  [4] "Close Shave, A (1995)"                                                      
##  [5] "Dr. Strangelove or: How I Learned to Stop Worrying and Love the Bomb (1963)"
##  [6] "Leaving Las Vegas (1995)"                                                   
##  [7] "One Flew Over the Cuckoo's Nest (1975)"                                     
##  [8] "Rear Window (1954)"                                                         
##  [9] "Heathers (1989)"                                                            
## [10] "L.A. Confidential (1997)"                                                   
## [11] "City of Lost Children, The (1995)"                                          
## [12] "Butch Cassidy and the Sundance Kid (1969)"                                  
## [13] "Titanic (1997)"                                                             
## [14] "Lawrence of Arabia (1962)"                                                  
## [15] "Shine (1996)"                                                               
## [16] "Stand by Me (1986)"                                                         
## [17] "Gandhi (1982)"                                                              
## [18] "To Kill a Mockingbird (1962)"                                               
## [19] "In the Name of the Father (1993)"                                           
## [20] "Harold and Maude (1971)"                                                    
## [[2]]
##  [1] "Boot, Das (1981)"                         
##  [2] "Dead Man Walking (1995)"                  
##  [3] "Lone Star (1996)"                         
##  [4] "Return of the Jedi (1983)"                
##  [5] "Celluloid Closet, The (1995)"             
##  [6] "Casablanca (1942)"                        
##  [7] "Citizen Kane (1941)"                      
##  [8] "Godfather: Part II, The (1974)"           
##  [9] "2001: A Space Odyssey (1968)"             
## [10] "When We Were Kings (1996)"                
## [11] "Diva (1981)"                              
## [12] "Close Shave, A (1995)"                    
## [13] "Tango Lesson, The (1997)"                 
## [14] "Beautiful Thing (1996)"                   
## [15] "Empire Strikes Back, The (1980)"          
## [16] "Mrs. Dalloway (1997)"                     
## [17] "Butch Cassidy and the Sundance Kid (1969)"
## [18] "My Fair Lady (1964)"                      
## [19] "Bonnie and Clyde (1967)"                  
## [20] "Annie Hall (1977)"                        
## [[3]]
##  [1] "Mrs. Brown (Her Majesty, Mrs. Brown) (1997)" 
##  [2] "Star Wars (1977)"                            
##  [3] "Pulp Fiction (1994)"                         
##  [4] "English Patient, The (1996)"                 
##  [5] "Full Monty, The (1997)"                      
##  [6] "Lone Star (1996)"                            
##  [7] "Titanic (1997)"                              
##  [8] "Sweet Hereafter, The (1997)"                 
##  [9] "In the Company of Men (1997)"                
## [10] "Willy Wonka and the Chocolate Factory (1971)"
## [11] "In & Out (1997)"                             
## [12] "Vertigo (1958)"                              
## [13] "As Good As It Gets (1997)"                   
## [14] "Apt Pupil (1998)"                            
## [15] "Dazed and Confused (1993)"                   
## [16] "Ice Storm, The (1997)"                       
## [17] "This Is Spinal Tap (1984)"                   
## [18] "Trainspotting (1996)"                        
## [19] "Heat (1995)"                                 
## [20] "Fargo (1996)"                                
## [[4]]
##  [1] "Titanic (1997)"               "English Patient, The (1996)" 
##  [3] "L.A. Confidential (1997)"     "Game, The (1997)"            
##  [5] "Good Will Hunting (1997)"     "Kiss the Girls (1997)"       
##  [7] "Full Monty, The (1997)"       "Usual Suspects, The (1995)"  
##  [9] "Rosewood (1997)"              "Boogie Nights (1997)"        
## [11] "Raise the Red Lantern (1991)" "Pulp Fiction (1994)"         
## [13] "Toy Story (1995)"             "Love Jones (1997)"           
## [15] "Eve's Bayou (1997)"           "Edge, The (1997)"            
## [17] "Sting, The (1973)"            "Some Like It Hot (1959)"     
## [19] "Strictly Ballroom (1992)"     "Soul Food (1997)"            
## [[5]]
##  [1] "Terminator 2: Judgment Day (1991)"                                          
##  [2] "Terminator, The (1984)"                                                     
##  [3] "Usual Suspects, The (1995)"                                                 
##  [4] "Contact (1997)"                                                             
##  [5] "Braveheart (1995)"                                                          
##  [6] "Casablanca (1942)"                                                          
##  [7] "Twelve Monkeys (1995)"                                                      
##  [8] "Godfather, The (1972)"                                                      
##  [9] "Shawshank Redemption, The (1994)"                                           
## [10] "Raising Arizona (1987)"                                                     
## [11] "Amadeus (1984)"                                                             
## [12] "Nikita (La Femme Nikita) (1990)"                                            
## [13] "Reservoir Dogs (1992)"                                                      
## [14] "Citizen Kane (1941)"                                                        
## [15] "Dr. Strangelove or: How I Learned to Stop Worrying and Love the Bomb (1963)"
## [16] "Schindler's List (1993)"                                                    
## [17] "Titanic (1997)"                                                             
## [18] "Leaving Las Vegas (1995)"                                                   
## [19] "North by Northwest (1959)"                                                  
## [20] "Army of Darkness (1993)"    


