计算,矩阵,每列非0的元素个数:count = sum(M ~=0);
矩阵评分最多的500个用户索引
ss=sum(X_aux~=0);
[c,pos]=sort(ss,'descend'); %%降序
python pandas
import pandas as pd
unames=['u_id','t_id','r','time']
ratings=pd.read_table('C:\Users\zhaojianfei\Desktop\Oliy\dataset\u.data',header=None,names=unames,engine='python')
unames=['u_id' ,'age' , 'gender' , 'occupation','zip']
users=pd.read_table('C:\Users\zhaojianfei\Desktop\Oliy\dataset\u.user',sep='|',header=None,names=unames,engine='python')
data=pd.merge(ratings,users)
ur_mean=data.pivot_table('r',index='u_id',columns='gender',aggfunc='mean')
users.head() ##显示前5行数据
pd.pivot_table(users,index=['gender','u_id']) #定义索引查看表数据
pd.pivot_table(users,index=["gender"],values=["age"],aggfunc=np.sum) #控制显示的索引和特定值,aggfunc功能函数,默认求均值?
##非数值(NaN)有点令人分心。如果想移除它们,我们可以使用“fill_value”将其设置为0。
ur_mean=data.pivot_table('r',index='u_id',columns='gender',aggfunc='mean',fill_value=0)
总和数据呢?“margins=True