用Java写了一个Lazy计算的Base User代码。可以用Spark 和 Spark Graph 实现。
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Comparator;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import scala.Tuple2;
import scala.Tuple3;
import scala.Tuple6;
/**
*
*/
public class UserBaseLazyTest
{
public static void main( String[] args )
{
List<Tuple3<Long, Long, Double>> data = Arrays.asList(
new Tuple3<Long, Long, Double>( 1L, 11L, 3.0 ),
new Tuple3<Long, Long, Double>( 1L, 12L, 4.0 ),
new Tuple3<Long, Long, Double>( 1L, 13L, 4.0 ),
new Tuple3<Long, Long, Double>( 2L, 11L, 4.0 ),
new Tuple3<Long, Long, Double>( 2L, 12L, 5.0 ),
new Tuple3<Long, Long, Double>( 3L, 13L, 4.0 ) );
LazyBaseUser lazy = new LazyBaseUser( data );
double rate = lazy.getRate( new Tuple2<Long, Long>( 2L, 13L ) );
System.out.println( rate );
}
private static class LazyBaseUser
{
private List<Tuple3<Long, Long, Double>> data;
public LazyBaseUser( List<Tuple3<Long, Long, Double>> data )
{
super( );
this.data = data;
}
public double getRate( Tuple2<Long, Long> ui )
{
Tuple3<Long, Long, Double> obj = findRate( ui );
if ( obj != null )
{
return obj._3( );
}
Tuple2<List<Long>, List<Long>> temp = getRalityUserAndItem( ui );
List<Long> ralityUser = temp._1;
List<Long> ralityItem = temp._2;
Map<Long, List<Tuple3<Long, Long, Double>>> map = new HashMap<Long, List<Tuple3<Long, Long, Double>>>( );
Map<Long, Integer> mapCount = new HashMap<Long, Integer>( );
for ( Tuple3<Long, Long, Double> t : data )
{
long item = t._2( );
long user = t._1( );
if ( mapCount.get( user ) == null )
{
mapCount.put( user, 1 );
}
else
{
mapCount.put( user, mapCount.get( user ) + 1 );
}
if ( !ralityItem.contains( item ) )
{
continue;
}
if ( !ralityUser.contains( user ) && user != ui._1)
{
continue;
}
if ( map.get( item ) == null )
{
List<Tuple3<Long, Long, Double>> list = new ArrayList<Tuple3<Long, Long, Double>>( );
list.add( t );
map.put( item, list );
}
else
{
List<Tuple3<Long, Long, Double>> list = map.get( item );
list.add( t );
}
}
Map<Tuple2<Long, Long>, Tuple6<Integer, Double, Double, Double, Integer, Integer>> result = new HashMap<Tuple2<Long, Long>, Tuple6<Integer, Double, Double, Double, Integer, Integer>>( );
for ( List<Tuple3<Long, Long, Double>> t : map.values( ) )
{
t.sort( new Comparator<Tuple3<Long, Long, Double>>( )
{
@Override
public int compare( Tuple3<Long, Long, Double> o1,
Tuple3<Long, Long, Double> o2 )
{
if ( o1._1( ) == ui._1 )
{
return -1;
}
return ( (Long) ( o1._1( ) - o2._1( ) ) ).intValue( );
}
} );
for ( int i = 0; i < 1; i++ )
{
for ( int j = i + 1; j < t.size( ); j++ )
{
Tuple3<Long, Long, Double> t1 = t.get( i );
Tuple3<Long, Long, Double> t2 = t.get( j );
Tuple2<Long, Long> key = new Tuple2<Long, Long>( t1._1( ), t2._1( ) );
double rating = t1._3( ) * t2._3( );
double ratingSq = Math.pow( t1._3( ), 2 );
double rating2Sq = Math.pow( t2._3( ), 2 );
if ( result.get( key ) == null )
{
result.put( key, new Tuple6<Integer, Double, Double, Double, Integer, Integer>( 1, rating, ratingSq, rating2Sq, mapCount.get( t1._1( ) ), mapCount.get( t2._1( ) ) ) );
}
else
{
Tuple6<Integer, Double, Double, Double, Integer, Integer> value = result.get( key );
result.put( key, new Tuple6<Integer, Double, Double, Double, Integer, Integer>( 1
+ value._1( ), rating
+ value._2( ), ratingSq
+ value._3( ), rating2Sq
+ value._4( ), Math.max( value._5( ), mapCount.get( t1._1( ) ) ), Math.max( value._6( ), mapCount.get( t2._1( ) ) ) ) );
}
}
}
}
Map<Tuple2<Long, Long>, Double> finalResult = new HashMap<Tuple2<Long, Long>, Double>( );
for ( Entry<Tuple2<Long, Long>, Tuple6<Integer, Double, Double, Double, Integer, Integer>> entry : result.entrySet( ) )
{
Tuple2<Long, Long> key = entry.getKey( );
Tuple6<Integer, Double, Double, Double, Integer, Integer> value = entry.getValue( );
double calResult = cosineSimilarity( value._2( ), Math.sqrt( value._3( ) ), Math.sqrt( value._4( ) ) )
* value._1( )
/ ( value._5( ) * Math.log10( value._6( ) + 10 ) );
finalResult.put( key, calResult );
}
double sumPair = 0.0;
double weight = 0.0;
for ( Tuple3<Long, Long, Double> t : data )
{
if ( t._2( ) != ui._2( ) )
{
continue;
}
Double simial = finalResult.get( new Tuple2<Long, Long>( ui._1, t._1( ) ) );
if ( simial != null )
{
weight = weight + simial;
sumPair = sumPair + simial * t._3( );
}
}
if (weight == 0)
{
return 0;
}
return sumPair/weight;
}
private Tuple3<Long, Long, Double> findRate( Tuple2<Long, Long> ui )
{
return null;
}
private Tuple2<List<Long>, List<Long>>
getRalityUserAndItem( Tuple2<Long, Long> ui )
{
List<Long> retUser = new ArrayList<Long>( );
List<Long> retItem = new ArrayList<Long>( );
for ( Tuple3<Long, Long, Double> t : data )
{
if ( t._2( ) == ui._2( ) )
{
if (!retUser.contains( t._1( ) ))
{
retUser.add( t._1( ) );
}
}
if ( t._1( ) == ui._1( ) )
{
if (!retItem.contains( t._2( ) ))
{
retItem.add( t._2( ) );
}
}
}
return new Tuple2<List<Long>, List<Long>>( retUser, retItem );
}
private List<Long> getRalityUser( Tuple2<Long, Long> ui )
{
List<Long> retValue = new ArrayList<Long>( );
for ( Tuple3<Long, Long, Double> t : data )
{
if ( t._2( ) == ui._2( ) )
{
retValue.add( ui._2( ) );
}
}
return retValue;
}
private List<Long> getRalityItem( Tuple2<Long, Long> ui )
{
List<Long> retValue = new ArrayList<Long>( );
for ( Tuple3<Long, Long, Double> t : data )
{
if ( t._1( ) == ui._1( ) )
{
retValue.add( ui._1( ) );
}
}
return retValue;
}
private static double cosineSimilarity( double dotProduct,
double ratingNorm, double rating2Norm )
{
return dotProduct / ( ratingNorm * rating2Norm );
}
}
}