背景:博主做了个vilivili弹幕网的项目,其中推荐视频的专栏部分用到了itemCF,在此记录下笔记和思路。
由于v站是一个用户量远大于作品量的网站,因而使用基于视频的推荐算法 Item CF 综合以下因素来作为用户该视频的受喜好程度
(有必要建立一张用户行为表)
用户对视频行为:
观看次数 小于+0.1(每3次+0.1) 点赞(0.8) 投币(1.0)
收藏(0.5 若在多个收藏夹都有收藏,后续每有一个收藏夹收藏+0.1)
分享数(每次分享 0.3)
获取用户对视频喜好程度的表,预测User1对Video5的喜好程度 LoveVaule 判断为是否值得推荐的阈值为初始阈值(其他用户对这个视频的喜好程度的均值)
(计算video之间的相似度,用计算余弦相似度来表示video之间的相似度)
Video1 Video2 Video3 Video4 Video5 Video6 Video7
User1 1.2 0.2 1.1 1.3 ? ? ?
User2 0 0.8 0.2 0 0.8 x x
User3 1.5 0 0 0.7 1.4 x x
User4 3.6 2 0.5 1.2 2.5 x x
User5 0 0 1 3.2 3.1 x x
User6 0 1 1.2 2.5 2.1 x x
不含User1的喜好程度
#Video1的坐标为(0,1.5,3.6,0,0)
#Video2的坐标为(0.8,0,2,0,1)
#Video2的坐标为(0.8,0,2,0,1)
#Video2的坐标为(0.8,0,2,0,1)
.....
#将其写出数组的形式double[] Video1 double[] Video2 double[] Video3....
CosineSimilarity cosineSim = new CosineSimilarity();
double similarity = cosineSim.compute(vector1, vector2);
依赖为:
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-math3</artifactId>
<version>3.6.1</version>
</dependency>
#每个坐标都和其他坐标求一遍 余弦相似度 相似度较高的提取出来
#通过User1对提取出来的视频的喜好程度预测(公式计算)User1对目标视频的喜好程度
↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓
// 相似视频喜好程度数据
Map<String, Double> similarVideos = new HashMap<>();
similarVideos.put("video1", 4.5);
similarVideos.put("video2", 3.8);
similarVideos.put("video3", 4.2);
// 相似视频与目标视频的相似度
Map<String, Double> similarities = new HashMap<>();
similarities.put("video1", 0.8);
similarities.put("video2", 0.6);
similarities.put("video3", 0.7);
double weightedSum = 0.0;
double totalWeight = 0.0;
// 计算加权平均
for (String video : similarVideos.keySet()) {
double similarity = similarities.get(video);
double rating = similarVideos.get(video);
weightedSum += similarity * rating;
totalWeight += similarity;
}
double predictedRating = weightedSum / totalWeight;
System.out.println("Predicted rating for the target video: " + predictedRating);
}
}
#预测出用户对未看过的视频的喜好程度后,根据阈值进行推荐(✓)
!!如果这个用户是新用户 什么视频都没看过,则只进行热门视频推荐
!!极端情况 这是用户所有视频都看过,则也只进行热门视频推荐
具体实现部分:
public class ItemCF_Utils { /* * 基于视频的协同过滤推荐算法 返回vno的list集合 参数为当前用户的编号uno * */ private static BehaviorServiceImp1 behaviorServiceImp1 = new BehaviorServiceImp1(); public static ArrayList<Integer> tuijie(int unoTrg,int freshCount) { System.err.println("使用了推荐算法......."); List<Behavior> behaviors = behaviorServiceImp1.selectAll(); System.out.println(behaviors); //设目标视频为vno8=[0.8, ?, 0.0] 目标视频的目标预测得分是当前登录用户对目标视频的得分预测 //假设当前登录用户为uno=2 // int unoTrg = 2;//(参数1) List<Map<String, Object>> data = new ArrayList<>(); for (Behavior behavior : behaviors) { Map<String, Object> map = new HashMap<>(); map.put("uno", behavior.getUno()); map.put("vno", behavior.getVno()); double shoucang = shoucangValue(behavior.getShoucang()); double play = playValue(behavior.getPlay()); map.put("loveValue", behavior.getDianzan() * 0.8 + behavior.getToubi() * 1.0 + shoucang + play); data.add(map); } System.out.println("######################"); System.out.println(data); //得到随机热门视频的list集合 ArrayList<Integer> hotVideo = getHotVideo(data); if (unoTrg==0){ //意味着没登录 return hotVideo; } //建立个集合存储当前用户所看过的vno ArrayList<Integer> played = new ArrayList<>(); //建立个set集合存储当前所有记录中的vno Set<Integer> vnos = new HashSet<>(); //查询当前用户没看过的视频vno for (int i = 0; i < data.size(); i++) { //所有vno vnos.add((int) data.get(i).get("vno")); //当前用户已看过的vno if ((int) data.get(i).get("uno") == 2) { //如果这个data信息是目标uno的 int vno = (int) data.get(i).get("vno"); played.add(vno); } } System.out.println("所有的" + vnos); List<Integer> vnoss = new ArrayList<>(vnos); System.out.println("看过的" + played); if (played==null){ //说明这个人什么视频都没看过,只进行热门视频推荐 return hotVideo; } //求出没看过的 ArrayList<Integer> noPlayed = new ArrayList<>(); for (Integer vno : vnos) { for (int i = 0; i < played.size(); i++) { if (vno == played.get(i)) { break; } else { if (i == played.size() - 1) { noPlayed.add(vno); } } } } System.out.println("没看过的" + noPlayed); if (noPlayed==null){ //说明这个人所有视频都看过了,只进行热门视频推荐 return hotVideo; } //筛选出目标 List<Map<String, Object>> dataTar = new ArrayList<>(); //向量 vno为键 各用户评分list集合为值 Map<Integer, List<Double>> result = new HashMap<>(); //建立一个uno的list集合 ArrayList<Integer> unoList = new ArrayList<>(); for (int i = 0; i < data.size(); i++) { int uno = (int) data.get(i).get("uno"); if (!unoList.contains(uno)) { unoList.add(uno); } } Collections.sort(unoList); System.out.println("unoList:" + unoList); for (Map<String, Object> d : data) { // System.out.println(d); int vno = (int) d.get("vno"); int uno = (int) d.get("uno"); System.err.println("uno...."+uno); double loveValue = (double) d.get("loveValue"); if (result.containsKey(vno)) { //一个视频被多人评过分,再次进来时,只需根据uno去改对应uno给的0分 System.err.println(uno+"的索引unoList.indexOf(uno)..索引"+(unoList.indexOf(uno))); System.err.println(uno+"的值 unoList.get(unoList.indexOf(uno))对应的值..."+unoList.get(unoList.indexOf(uno))); result.get(vno).set(unoList.indexOf(uno), loveValue); } else { List<Double> list = new ArrayList<>(); for (Integer uno1 : unoList) { //按用户顺序把评分加入进去,没有对应用户的就为0 if (d.get("uno") == uno1) { list.add(loveValue); } else { list.add(0.0); } } // list.add(loveValue); result.put(vno, list); } System.out.println("想说藏话了。。。。"); } System.out.println("我测。。。。"); System.out.println("剔除前:" + result); //得到目标 uno 在unoList中的位置索引 int index = unoList.indexOf(unoTrg); //以该索引去result中将该索引位置的得分提取出来当作一个数组(用于最后预测得分求加权平均数) double[] endLoveValue = new double[result.size()]; int index1 = 0; for (Integer key : result.keySet()) { List<Double> value = result.get(key); Double aDouble = value.get(index); endLoveValue[index1] = aDouble; index1++; for (int i = index; i < value.size(); i++) { if (i == value.size() - 1) { //最后一个删掉 value.remove(i); } else { value.set(i, value.get(i + 1)); } } result.replace(key, value); } System.out.println("将目标值相关的对应其他视频的这个值剔除后 result的值为:" + result); System.out.println("endLoveValue 目标值相关的........." + Arrays.toString(endLoveValue)); //因为endLoveValue集合带上来未观看的假评分0.0 最后求加权时 总数量要减去 noPlayed.size() Map<Integer, double[]> map = new HashMap<>(); ArrayList<double[]> arrayList = new ArrayList<>(); //转成数组 (把vno为键 各用户评分list集合为值改为double数组为值) for (Integer key : result.keySet()) { List<Double> value = result.get(key); Object[] array = value.toArray(); // 创建一个与 objectArray 大小相同的 double 数组 double[] doubleArray = new double[array.length]; // 将 Double 对象转换为对应的 double 值 for (int i = 0; i < array.length; i++) { doubleArray[i] = ((Double) array[i]).doubleValue(); } // arrayList.add(doubleArray); System.out.println("key为" + key + " double类型的数组集合:" + Arrays.toString(doubleArray)); map.put(key, doubleArray); } //喜好程度 HashMap<String, HashMap<String, Double>> similarVideos = new HashMap<>(); //相似度 HashMap<String, HashMap<String, Double>> similarities = new HashMap<>(); for (int i = 0; i < noPlayed.size(); i++) { for (Integer key : map.keySet()) { if (!noPlayed.contains(key)) { Matrix matrix1 = new Matrix(map.get(key), 1); Matrix matrix2 = new Matrix(map.get(noPlayed.get(i)), 1); // 计算向量的余弦相似度 double cosineSimilarity = cosineSimilarity(matrix1, matrix2); System.out.println("Video" + key + "与" + "Video" + noPlayed.get(i) + "的余弦相似度: " + cosineSimilarity); if (cosineSimilarity >= 0.6) { //说明是相似的视频 HashMap<String, Double> similarVideo = new HashMap<>(); HashMap<String, Double> similar = new HashMap<>(); //将被剔除的那一部分的得分拿回来 准备做预测了 String s = "vno" + key; similarVideo.put(s, endLoveValue[vnoss.indexOf(key)]); similar.put(s, cosineSimilarity); similarVideos.put("Video" + noPlayed.get(i), similarVideo); similarities.put("Video" + noPlayed.get(i), similar); } } } } System.out.println("similarVideos....喜好程度" + similarVideos); System.out.println("similarities.....相似程度" + similarities); //值得推荐的vno的list集合 ArrayList<Integer> list = new ArrayList<>(); double weightedSum = 0.0; double totalWeight = 0.0; // 计算加权平均 for (String video : similarVideos.keySet()) { System.out.println("正在预测" + video + "的喜好程度....."); int totalCount=similarVideos.get(video).size(); double totalRate =0.0; for (String vno : similarVideos.get(video).keySet()) { double similarity = similarities.get(video).get(vno); double rating = similarVideos.get(video).get(vno); weightedSum = similarity * rating; totalWeight = similarity; totalRate+=rating; } double predictedRating = weightedSum / totalWeight; System.out.println("Predicted rating for the target video: " + predictedRating); //阈值用平均数来表示 double avgRate=totalRate/totalCount; int i = Integer.parseInt(video.substring(5)); if (predictedRating>=avgRate){ //预测的喜好程度大于或等于阈值 则进行推荐 list.add(i); } } ArrayList<Integer> newList=new ArrayList<>(); if (list.size()<6){ System.out.println("被推荐的vno们...."+hotVideo); return hotVideo; }else { if (freshCount==1){ newList = new ArrayList<>(list.subList(0, Math.min(list.size(), 6))); System.out.println("被推荐的vno们...."+newList); }else if(((freshCount+1)*6+1)>list.size()){ System.out.println("被推荐的vno们...."+newList); return newList; }else { newList = new ArrayList<>(list.subList(freshCount*6, Math.min(list.size(), (freshCount+1)*6+1))); System.out.println("被推荐的vno们...."+newList); } return newList; } } // 计算余弦相似度 public static double cosineSimilarity(Matrix vector1, Matrix vector2) { // 计算向量的内积 double dotProduct = vector1.times(vector2.transpose()).get(0, 0); // 计算向量的范数 double norm1 = vector1.normF(); double norm2 = vector2.normF(); // 计算余弦相似度 double similarity = dotProduct / (norm1 * norm2); return similarity; } //计算收藏得分 public static double shoucangValue(int count){ if (count==0){ return 0.0; }else { return 0.5+(count-1)*0.1; } } //计算播放量得分 public static double playValue(int count){ if (count<3){ return 0.1; }else { return ((int)(count)/3)*0.1; } } //获取6个热门视频 如果不够6个视频,就全部推荐出去 public static ArrayList<Integer> getHotVideo(List<Map<String, Object>> list) { int numElements = 6; // 获取最大的6个Map if (list.size()<6){ numElements=list.size(); } Comparator<Map<String, Object>> comparator = new Comparator<Map<String, Object>>() { @Override public int compare(Map<String, Object> map1, Map<String, Object> map2) { return Double.compare((Double)map1.get("loveValue"), (Double)map2.get("loveValue")); } }; Collections.sort(list, comparator); List<Map<String, Object>> maxMaps = list.subList(0, numElements); ArrayList<Integer> vnoList=new ArrayList<>(); // 将vno封装成 ArrayList<Integer> for (Map<String, Object> map : maxMaps) { Integer vno = (Integer)map.get("vno"); vnoList.add(vno); } return vnoList; } }