1、最近一个区块链门户网站,给用户推荐网页信息需要用到推荐,结合各种推荐算法,最后选择了简单且推荐效果不错的apriori算法。算法原理可以自己找资料学习下,下面给出项目的案例。
2、先来一个测试类。数据的格式拼接成下面固定的格式,原始数据你自己先整理下这种格式。
@Test
public void testApriori() {
ArrayList<String> dataList = new ArrayList<>();
//模拟用户收藏夹数据
dataList.add("金色财经;区块链律动;火星财经");
dataList.add("The block;金色财经;了得资本");
dataList.add("巴比特;金色财经;区块链律动");
dataList.add("巴比特;区块链律动;金色财经;");
dataList.add("区块链律动;金色财经;火星财经;");
dataList.add("火星财经;金色财经;");
dataList.add("火星财经;金色财经;");
dataList.add("了得资本;分布式资本;");
dataList.add("分布式资本;区块链律动;金色财经;火星财经;巴比特;");
dataList.add("了得资本;区块链律动;");
dataList.add("巴比特;了得资本;");
dataList.add("了得资本;金色财经;");
System.out.println("====数据集合====");
for (String string : dataList) {
System.out.println(string);
}
AprioriUtil aprioriUtil = new AprioriUtil();
System.out.println("====频繁项集====");
Map<String, Integer> frequentSetMap = aprioriUtil.apriori(dataList);
Set<String> keySet = frequentSetMap.keySet();
for (String key : keySet) {
System.out.println(key + " : " + frequentSetMap.get(key));
}
System.out.println("====关联规则====");
Map<String, Double> relationRulesMap = aprioriUtil.getRelationRules(frequentSetMap);
Set<String> rrKeySet = relationRulesMap.keySet();
for (String keyString : rrKeySet) {
System.out.println(keyString + ":" + relationRulesMap.get(keyString));
}
}
3、编写一个自己的工具类AprioriUtil,方法如下:
public class AprioriUtil {
/**
* 支持度阈值
*/
// private final static int SUPPORT = 2;
private final static int SUPPORT = 2;
/***
* 置信度阈值
*/
private final static double CONFIDENCE = 0.6;
// private final static double CONFIDENCE = 0.6;
/**
* 项之间的分隔符
*/
private final static String ITEM_SPLIT = ";";
/**
* 项之间的分隔符
*/
private final static String CON = "->";
/**
* 算法主程序
*
* @param dataList
* @return
*/
public Map<String, Integer> apriori(ArrayList<String> dataList) {
Map<String, Integer> stepFrequentSetMap = new HashMap<>();
stepFrequentSetMap.putAll(findFrequentOneSets(dataList));
//频繁项集
Map<String, Integer> frequentSetMap = new HashMap<String, Integer>();
frequentSetMap.putAll(stepFrequentSetMap);
while (stepFrequentSetMap != null && stepFrequentSetMap.size() > 0) {
Map<String, Integer> candidateSetMap = aprioriGen(stepFrequentSetMap);
Set<String> candidateKeySet = candidateSetMap.keySet();
//扫描D,进行计数
for (String data : dataList) {
for (String candidate : candidateKeySet) {
boolean flag = true;
String[] strings = candidate.split(ITEM_SPLIT);
for (String string : strings) {
if (data.indexOf(string + ITEM_SPLIT) == -1) {
flag = false;
break;
}
}
if (flag) {
candidateSetMap.put(candidate, candidateSetMap.get(candidate) + 1);
}
}
}
//从候选集中找到符合支持度的频繁项集
stepFrequentSetMap.clear();
for (String candidate : candidateKeySet) {
Integer count = candidateSetMap.get(candidate);
if (count >= SUPPORT) stepFrequentSetMap.put(candidate, count);
}
// 合并全部频繁集
frequentSetMap.putAll(stepFrequentSetMap);
}
return frequentSetMap;
}
/**
* find frequent 1 itemsets
*
* @param dataList
* @return
*/
private Map<String, Integer> findFrequentOneSets(ArrayList<String> dataList) {
Map<String, Integer> resultSetMap = new HashMap<>();
for (String data : dataList) {
String[] strings = data.split(ITEM_SPLIT);
for (String string : strings) {
string += ITEM_SPLIT;
if (resultSetMap.get(string) == null) {
resultSetMap.put(string, 1);
} else {
resultSetMap.put(string, resultSetMap.get(string) + 1);
}
}
}
return resultSetMap;
}
/**
* 依据上一步的频繁项集的集合选出候选集
*
* @param setMap
* @return
*/
private Map<String, Integer> aprioriGen(Map<String, Integer> setMap) {
Map<String, Integer> candidateSetMap = new HashMap<>();
Set<String> candidateSet = setMap.keySet();
for (String s1 : candidateSet) {
String[] strings1 = s1.split(ITEM_SPLIT);
String s1String = "";
for (String temp : strings1)
s1String += temp + ITEM_SPLIT;
for (String s2 : candidateSet) {
String[] strings2 = s2.split(ITEM_SPLIT);
boolean flag = true;
for (int i = 0; i < strings1.length - 1; i++) {
if (strings1[i].compareTo(strings2[i]) != 0) {
flag = false;
break;
}
}
if (flag && strings1[strings1.length - 1].compareTo(strings2[strings1.length - 1]) < 0) {
//连接步:产生候选
String c = s1String + strings2[strings2.length - 1] + ITEM_SPLIT;
if (hasInfrequentSubset(c, setMap)) {
//剪枝步:删除非频繁的候选
} else {
candidateSetMap.put(c, 0);
}
}
}
}
return candidateSetMap;
}
/**
* 使用先验知识,推断候选集是否是频繁项集
*
* @param
* @param setMap
* @return
*/
private boolean hasInfrequentSubset(String candidateSet, Map<String, Integer> setMap) {
String[] strings = candidateSet.split(ITEM_SPLIT);
//找出候选集全部的子集,并推断每一个子集是否属于频繁子集
for (int i = 0; i < strings.length; i++) {
String subString = "";
for (int j = 0; j < strings.length; j++) {
if (j != i) {
subString += strings[j] + ITEM_SPLIT;
}
}
if (setMap.get(subString) == null) {
return true;
}
}
return false;
}
/**
* 由频繁项集产生关联规则
*
* @param frequentSetMap
* @return
*/
public Map<String, Double> getRelationRules(Map<String, Integer> frequentSetMap) {
Map<String, Double> relationsMap = new HashMap<>();
Set<String> keySet = frequentSetMap.keySet();
for (String key : keySet) {
List<String> keySubset = subset(key);
for (String keySubsetItem : keySubset) {
//子集keySubsetItem也是频繁项
Integer count = frequentSetMap.get(keySubsetItem);
if (count != null) {
Double confidence = (1.0 * frequentSetMap.get(key)) / (1.0 * frequentSetMap.get(keySubsetItem));
if (confidence > CONFIDENCE) relationsMap.put(keySubsetItem + CON + expect(key, keySubsetItem), confidence);
}
}
}
return relationsMap;
}
/**
* 求一个集合全部的非空真子集
*
* @param sourceSet
* @return 为了以后能够用在其它地方。这里我们不是用递归的方法
* <p>
* 思路:如果集合S(A,B,C,D)。其大小为4。拥有2的4次方个子集,即0-15,二进制表示为0000,0001。...,1111。
* 相应的子集为空集。{D},...。{A,B,C,D}。
*/
private List<String> subset(String sourceSet) {
List<String> result = new ArrayList<>();
String[] strings = sourceSet.split(ITEM_SPLIT);
//非空真子集
for (int i = 1; i < (int) (Math.pow(2, strings.length)) - 1; i++) {
String item = "";
String flag = "";
int ii = i;
do {
flag += "" + ii % 2;
ii = ii / 2;
} while (ii > 0);
for (int j = flag.length() - 1; j >= 0; j--) {
if (flag.charAt(j) == '1') {
item = strings[j] + ITEM_SPLIT + item;
}
}
result.add(item);
}
return result;
}
/**
* 集合运算,A/B
*
* @return
*/
private String expect(String stringA, String stringB) {
String result = "";
String[] stringAs = stringA.split(ITEM_SPLIT);
String[] stringBs = stringB.split(ITEM_SPLIT);
for (int i = 0; i < stringAs.length; i++) {
boolean flag = true;
for (int j = 0; j < stringBs.length; j++) {
if (stringAs[i].compareTo(stringBs[j]) == 0) {
flag = false;
break;
}
}
if (flag) result += stringAs[i] + ITEM_SPLIT;
}
return result;
}
}
4、然后自己写个定时任务,结合上面的算法,实现推荐业务。数据的来源和计算结果可以保存到缓存或者数据库,看自己的业务了。
推荐服务类:
public interface RecommendLinkLeyValuesService extends IService<RecommendLinkLeyValues> {
/**
* 加载文件
*
* @throws Exception
*/
ArrayList<String> loadFileData(String filePath) throws Exception;
/**
* 构建关联数据
*
* @param
* @return
* @throws Exception
*/
ArrayList<String> buildRelationData() throws Exception;
/**
* 保存关联数据
*
* @param dataList
* @throws Exception
*/
boolean saveRelationRulesList(ArrayList<String> dataList) throws Exception;
/**
* 给所有用户推荐对应的链接
*
* @throws Exception
*/
void recommendToALLUsers() throws Exception;
/**
* 给用户推荐链接
*
* @param userId
* @throws Exception
*/
void recommendToUserByUserId(Integer userId) throws Exception;
推荐服务的实现类:
@Service
@Slf4j
public class RecommendLinkLeyValuesServiceImpl extends ServiceImpl<RecommendLinkLeyValuesMapper,
RecommendLinkLeyValues> implements RecommendLinkLeyValuesService {
@Autowired
private UsersService usersService;
@Autowired
private LinkService linkService;
@Autowired
private UserLinkCollectService userLinkCollectService;
/**
* 用户推荐列表服务类
*/
@Autowired
private UserRecommendLinkService userRecommendLinkService;
/****
* 构建关联数据
* @return
* @throws Exception
*/
@Override
public ArrayList<String> buildRelationData() throws Exception {
//读取所有的用户收藏记录
ArrayList<String> arrayList = new ArrayList<>();
QueryWrapper<Users> queryWrapper = new QueryWrapper();
//取有效的用户
queryWrapper.eq("status", "1");
List<Users> users = usersService.getBaseMapper().selectList(queryWrapper);
for (Users usersTemp : users) {
//查询用户所有的收藏记录
List<UserLinkCollect> userLinkCollectList = userLinkCollectService.findAllByUserId(usersTemp.getUserId());
//拼接形成关联数据集
String reString = "";
for (UserLinkCollect userLinkCollect : userLinkCollectList) {
reString += ";" + userLinkCollect.getLinkId();
}
System.out.println("====单个用户的====");
//把;替换成标准数据格式
System.out.println(reString.replaceFirst(";", ""));
if (!reString.isEmpty()) {
//把;替换成标准数据格式
arrayList.add(reString.replaceFirst(";", ""));
}
}
log.info("====生成关联数据====");
for (String str : arrayList) {
log.info("{}", str);
}
return arrayList;
}
/**
* 读取文件数据返回收藏数据
*
* @param filePath
* @return
* @throws Exception
*/
@Override
public ArrayList<String> loadFileData(String filePath) throws Exception {
ArrayList<String> stringList = new ArrayList<>();
if (filePath == null) {
return stringList;
}
FileReader fileReader = new FileReader(new File(filePath));
//缓冲写入,提供了readLine函数,可以按行读取。
BufferedReader bufferedReader = new BufferedReader(fileReader);
String str = null;
while ((str = bufferedReader.readLine()) != null) {
//按行读取
log.info("加载关联数据:{}", str);
//添加到集合
stringList.add(str);
}
return stringList;
}
/**
* 保存关联数据
*
* @param dataList
* @throws Exception
*/
@Override
public boolean saveRelationRulesList(ArrayList<String> dataList) throws Exception {
try {
AprioriUtil aprioriUtil = new AprioriUtil();
log.info("====频繁项集====");
Map<String, Integer> frequentSetMap = aprioriUtil.apriori(dataList);
List<AprioriEntity> aprioriEntityList = this.buildList(aprioriUtil, frequentSetMap);
if (aprioriEntityList == null || aprioriEntityList.size() == 0) {
log.info("====无频繁项集====");
return false;
}
for (AprioriEntity aprioriEntity : aprioriEntityList) {
//生成推荐原始数据key-values
RecommendLinkLeyValues recommendLinkLeyValues = new RecommendLinkLeyValues();
//设置推荐的key Id
recommendLinkLeyValues.setKeyId(aprioriEntity.getCollectId());
//设置推荐的value Id
recommendLinkLeyValues.setValueId(aprioriEntity.getRecommendId());
//设置关联评分度
recommendLinkLeyValues.setScore(aprioriEntity.getScore());
//保存信息到数据库
this.saveOrUpdateInfo(recommendLinkLeyValues);
}
} catch (Exception exception) {
log.info("error:{}", exception.getMessage());
return false;
}
return true;
}
private List<AprioriEntity> buildList(AprioriUtil aprioriUtil, Map<String, Integer> frequentSetMap) {
ArrayList<AprioriEntity> aprioriEntityArrayList = new ArrayList<>();
if (frequentSetMap == null || frequentSetMap.size() == 0) {
return aprioriEntityArrayList;
}
Set<String> keySet = frequentSetMap.keySet();
for (String key : keySet) {
System.out.println(key + " : " + frequentSetMap.get(key));
}
System.out.println("====关联规则====");
Map<String, Double> relationRulesMap = aprioriUtil.getRelationRules(frequentSetMap);
Set<String> rrKeySet = relationRulesMap.keySet();
for (String keyString : rrKeySet) {
String content[] = keyString.split("->");
//12;209;9;->2;6;
if (content != null && content.length > 0) {
String key1 = content[0];
String val1 = content[1];
String[] key11 = key1.split(";");
String[] key21 = val1.split(";");
double score = Double.valueOf(relationRulesMap.get(keyString).toString());
for (String str1 : key11) {
for (String str2 : key21) {
AprioriEntity aprioriEntity = new AprioriEntity(Integer.valueOf(str1), Integer.valueOf(str2), score);
if (!aprioriEntityArrayList.contains(aprioriEntity)) {
aprioriEntityArrayList.add(aprioriEntity);
}
}
}
}
}
aprioriEntityArrayList.sort(Comparator.comparing(AprioriEntity::getCollectId));
aprioriEntityArrayList.stream().collect(Collectors.groupingBy(AprioriEntity::getCollectId));
return aprioriEntityArrayList;
}
/**
* 保存信息到数据库
*
* @param recommendLinkLeyValues
* @throws Exception
*/
private void saveOrUpdateInfo(RecommendLinkLeyValues recommendLinkLeyValues) throws Exception {
if (recommendLinkLeyValues == null) {
return;
}
recommendLinkLeyValues.setCreateTime(new Date());
recommendLinkLeyValues.setUpdateTime(new Date());
recommendLinkLeyValues.setMemo("系统自动计算");
//收藏的某个链接关键字
Link linkKey = linkService.findLinkById(recommendLinkLeyValues.getKeyId());
if (linkKey == null) {
return;
}
//推荐的链接
Link linkValue = linkService.findLinkById(recommendLinkLeyValues.getValueId());
if (linkValue == null) {
return;
}
QueryWrapper queryWrapper = new QueryWrapper();
queryWrapper.eq("key_id", recommendLinkLeyValues.getKeyId());
queryWrapper.eq("value_id", recommendLinkLeyValues.getKeyId());
RecommendLinkLeyValues dbValues = this.baseMapper.selectOne(queryWrapper);
if (dbValues != null) {
dbValues.setKeyName(linkKey.getLinkName());
dbValues.setValueName(recommendLinkLeyValues.getValueName());
dbValues.setScore(recommendLinkLeyValues.getScore());
dbValues.setUpdateTime(new Date());
dbValues.setMemo(recommendLinkLeyValues.getMemo());
this.updateById(dbValues);
} else {
//设置推荐key名称
recommendLinkLeyValues.setKeyName(linkKey.getLinkName());
//设置推荐名称
recommendLinkLeyValues.setValueName(linkValue.getLinkName());
this.save(recommendLinkLeyValues);
}
}
/***
* 给所有用户推荐链接,有效用户
* @throws Exception
*/
@Override
public void recommendToALLUsers() throws Exception {
List<Users> users = new ArrayList<>();
QueryWrapper<Users> queryWrapper = new QueryWrapper();
queryWrapper.eq("status", "1");
users = usersService.getBaseMapper().selectList(queryWrapper);
for (Users usersTemp : users) {
this.recommendToUserByUserId(usersTemp.getUserId());
}
}
/***通过用户收藏给出推荐记录**/
@Override
public void recommendToUserByUserId(Integer userId) throws Exception {
if (userId == null) {
return;
}
UserRecommendLink userRecommendLink = new UserRecommendLink();
//查询用户所有的收藏记录
List<UserLinkCollect> userLinkCollectList = userLinkCollectService.findAllByUserId(userId);
//查询用户收藏记录
if (userLinkCollectList == null || userLinkCollectList.size() == 0) {
log.info("用户:{},无收藏记录", userId);
return;
}
for (UserLinkCollect userLinkCollect : userLinkCollectList) {
//用户收藏的链接
Integer collectLinkId = userLinkCollect.getLinkId();
if (collectLinkId != null) {
QueryWrapper wrapper = new QueryWrapper();
wrapper.eq("key_id", collectLinkId);
//读取数据库的推荐数据
List<RecommendLinkLeyValues> recommendLinkLeyValuesList = this.baseMapper.selectList(wrapper);
//如果有推荐数据
for (RecommendLinkLeyValues recommendLinkLeyValues : recommendLinkLeyValuesList) {
Link valuesLink = linkService.findLinkById(recommendLinkLeyValues.getValueId());
Users users = usersService.getBaseMapper().selectById(userId);
userRecommendLink.setUserId(userId);
userRecommendLink.setLinkId(recommendLinkLeyValues.getValueId());
//添加的链接名称是:Wyre,用户是:user18716375591
log.info("添加推荐链接:{},用户昵称{}", valuesLink.getLinkName(), users.getNickName());
userRecommendLink.setMemo("推荐的链接名称是:" + valuesLink.getLinkName() + "用户是:" + users.getNickName());
userRecommendLink.setCreateTime(new Date());
userRecommendLink.setUpdateTime(new Date());
QueryWrapper<UserRecommendLink> queryWrapper = new QueryWrapper<>();
queryWrapper.eq("user_id", userRecommendLink.getUserId());
queryWrapper.eq("link_id", userRecommendLink.getLinkId());
UserRecommendLink userRecommendLinkDB = userRecommendLinkService.getBaseMapper().selectOne(queryWrapper);
//添加or修改
if (userRecommendLinkDB == null) {
boolean flag = userRecommendLinkService.saveOrUpdate(userRecommendLink);
log.info("添加推荐结果 flag:{}", flag);
} else {
userRecommendLinkDB.setUpdateTime(new Date());
userRecommendLinkService.updateById(userRecommendLinkDB);
}
}
}
}
}
}
5、这个计算比较耗时,以后计算量比较大,推荐写一个定时任务,凌晨自动执行。
/**
* @author yichuan
* 定时任务
*/
//1.主要用于标记配置类,兼备Component的效果
@Configuration
//2.开启定时任务
@EnableScheduling
@Slf4j
public class StaticScheduleTask {
@Autowired
private RecommendLinkLeyValuesService recommendLinkLeyValuesService;
/***
*添加定时任务,每天凌晨0点执行一次
*/
//3.添加定时任务,每天凌晨0点执行一次
@Scheduled(cron = "0 0 0 * * ?")
private void configureTasks() {
try {
log.info("执行静态定时任务时间:{}", LocalDateTime.now());
//执行静态定时任务时间
System.err.println("执行静态定时任务时间: " + LocalDateTime.now());
//添加用户收藏链接的推荐服务
ArrayList<String> dataList = recommendLinkLeyValuesService.buildRelationData();
// 保存关联数据
boolean flag = recommendLinkLeyValuesService.saveRelationRulesList(dataList);
//有关联规则
if (flag) {
//给用户推荐链接
recommendLinkLeyValuesService.recommendToALLUsers();
}
} catch (Exception exception) {
exception.printStackTrace();
log.info("ex:{}", exception.getMessage());
}
}
}