import pandas as pd
from pandas import DataFrame
from sklearn.metrics.pairwise import pairwise_distances
import numpy as np
users = ["User1","User2","User3","User4","User5"]
items = ["Item1","Item2","Item3","Item4","Item5"]
dataset = [
[1,0,1,1,0],
[1,0,0,1,1],
[1,0,1,0,0],
[0,1,0,1,1],
[1,1,1,0,1]
]
df = pd.DataFrame(dataset,columns=items,index=users)
print(df)
user_semilar = 1-pairwise_distances(df.values,metric="jaccard")
user_semilar = pd.DataFrame(user_semilar,columns=users,index=users)
topN_users = {}
for i in user_semilar.index:
_df = user_semilar.loc[i].drop([i])
_df_sorted = _df.sort_values(ascending=False)
top2 = list(_df_sorted.index[:2])
topN_users[i] = top2
results = {}
for user,sim_users in topN_users.items():
result = set()
for sim_user in sim_users:
result = result.union(set(df.loc[sim_user].replace(0,np.nan).dropna().index))
result -= set(df.loc[user].replace(0,np.nan).dropna().index)
results[user] = result
print(results)
items_similar = 1-pairwise_distances(df.T.values,metric="jaccard")
items_similar = pd.DataFrame(items_similar,index=items,columns=items)
topN_items = {}
for i in items_similar.index:
_df = items_similar.loc[i].drop([i])
_df_sorted = _df.sort_values(ascending=False)
top2 = list(_df_sorted.index[:2])
topN_items[i] = top2
results = {}
for user in df.index:
result = set()
u_items = set(df.loc[user].replace(0, np.nan).dropna().index)
for u_item in u_items:
result = result.union(set(topN_items[u_item]))
result -= u_items
results[user] = result
print(results)