基于项亮编著的 推荐系统实践一书中的用户协同过滤算法
使用矩阵表示用户-商品关系,使用伪矩阵乘计算相关性
代码:
package gt.small;
import java.util.BitSet;
public class UserSimilarity {
/**
* @param args
*/
public static void main(String[] args) {
// TODO Auto-generated method stub
UserCF ucf = new UserCF(4, 5);// example from <recommendation system in
// action>by xiang liang page 47
User u = ucf.getUser(0);
u.set(0);
u.set(1);
u.set(3);
u = ucf.getUser(1);
u.set(0);
u.set(2);
u = ucf.getUser(2);
u.set(1);
u.set(4);
u = ucf.getUser(3);
u.set(2);
u.set(3);
u.set(4);
ucf.calculateSimilarity();
int item = ucf.recommend(0, 3);
System.out.println(item);
}
}
class UserCF {
private DTMatrix similarity;// similarity matrix
private User[] users;// all user
private int itemCount;
public UserCF(int userCount, int itemCount) {
similarity = new DTMatrix(userCount);
users = new User[userCount];
for (int i = 0; i < userCount; ++i) {
users[i] = new User();
}
this.itemCount = itemCount;
}
public User getUser(int index) {
return users[index];
}
public void calculateSimilarity() {// calculate similarity by bit operation
// and similar to matrix multiple
for (int i = 0; i < users.length; ++i) {
User a = users[i];
int na = a.getN();
for (int j = i + 1; j < users.length; ++j) {
User b = users[j];
int nb = b.getN();
int ni = a.intersect(b);
similarity.set(i, j, ni / Math.sqrt(na * nb));
}
}
}
private void insert(int user, int[] indexes, int v, int count) {// insert
// sort
boolean in = false;
for (int i = count - 1; i >= 0; --i) {
if (similarity.get(user, indexes[i]) < similarity.get(user, v)) {// users[v]
// is
// closer
// to
// user
indexes[i + 1] = indexes[i];
} else {
indexes[i + 1] = v;
in = true;
break;
}
}
if (!in) {
indexes[0] = v;
}
}
private int[] topK(int user, int k) {// top @k closest users to @user
int[] indexes = new int[k];
double min = similarity.get(user, 0);
int count = 1;
indexes[0] = 0;
for (int i = 1; i < users.length; ++i) {
if (similarity.get(user, i) > min) {
if (count < k) {// not full add
insert(user, indexes, i, count);
++count;
} else {// replace the last one
insert(user, indexes, i, k - 1);
min = similarity.get(user, indexes[k - 1]);
}
}
}
return indexes;
}
public int recommend(int user, int k) {// recommend the best item using
// usercf with para @k
int[] topK = topK(user, k);
// select top k indexes in similarity and put them in an array
BitSet all = users[user].getCopy();
for (int i = 0; i < k; ++i) {
all.or(users[topK[i]].getItems());
}
int item = -1;
double weight = 0;
for (int i = 0; i < itemCount; ++i) {
if (all.get(i)) {
double w = 0;
for (int j = 0; j < k; ++j) {
if (users[topK[j]].get(i)) {
w += similarity.get(user, topK[j]);
}
}
if (w > weight) {
item = i;
weight = w;
}
}
}
return item;
}
}
class User {
private BitSet items = new BitSet();// stores the user-item relationship
private int n = -1;// store to speed up multi-access bitset hasn't done
// this
public void set(int item) {
items.set(item);
n = -1;
}
public boolean get(int item) {
return items.get(item);
}
public BitSet getItems() {
return items;
}
public int getN() {
n = -1 == n ? items.cardinality() : n;
return n;
}
public int intersect(User u) {
if (u == this) {
return 0;
}
BitSet tmp = (BitSet) items.clone();
tmp.and(u.items);
return tmp.cardinality();
}
public BitSet getCopy() {
return (BitSet) this.items.clone();
}
}