商品订单频繁项集Bolt实现

一 设计方案介绍
SplitBolt:对订单中的商品进行两两组合并发送
PairCountBolt:计算商品对出现的次数
PairTotalCountBolt:计算商品对总数
SupportComputeBolt:计算商品对的支持度
ConfidenceComputeBolt:计算商品对的置信度
FilterBolt:过滤符合条件的商品对并存入redis

二 SplitBolt实现
package com.hust.grid.leesf.ordertest.bolt;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import com.hust.grid.leesf.ordertest.common.FieldNames;
import backtype.storm.task.OutputCollector;
import backtype.storm.task.TopologyContext;
import backtype.storm.topology.OutputFieldsDeclarer;
import backtype.storm.topology.base.BaseRichBolt;
import backtype.storm.tuple.Fields;
import backtype.storm.tuple.Tuple;
import backtype.storm.tuple.Values;
/**
 * 对订单中的商品进行两两组合并发送
 *
 * @author leesf
 *
 */
public class SplitBolt extends BaseRichBolt {
     private static final long serialVersionUID = 1L;
     private OutputCollector collector;
     private Map<String, List<String>> orderItems; // 存储订单及其商品
     public void prepare(@SuppressWarnings("rawtypes") Map conf, TopologyContext context, OutputCollector collector) {
           this.collector = collector;
           orderItems = new HashMap<String, List<String>>();
     }
     public void execute(Tuple tuple) {
           // 获取订单号和商品名称
           String id = tuple.getStringByField(FieldNames.ID);
           String newItem = tuple.getStringByField(FieldNames.NAME);
           if (!orderItems.containsKey(id)) { // 不包含该订单
                // 新生商品链表
                ArrayList<String> items = new ArrayList<String>();
                // 添加商品
                items.add(newItem);
                orderItems.put(id, items);
                return;
           }
           // 包含订单,取出订单中包含的商品
           List<String> items = orderItems.get(id);
           for (String existItem : items) { // 遍历商品
                // 将元组中提取的商品与订单中已存在的商品组合后发射
                collector.emit(createPair(newItem, existItem));
           }
           // 添加新的商品
           items.add(newItem);
     }
     private Values createPair(String item1, String item2) { // 按照指定顺序生成商品对
           if (item1.compareTo(item2) > 0) {
                return new Values(item1, item2);
           }
           return new Values(item2, item1);
     }
     public void declareOutputFields(OutputFieldsDeclarer declarer) {
           // 声明元组字段
           declarer.declare(new Fields(FieldNames.ITEM1, FieldNames.ITEM2));
     }
}

三 PairCountBolt实现
package com.hust.grid.leesf.ordertest.bolt;

import java.util.HashMap;
import java.util.Map;

import com.hust.grid.leesf.ordertest.common.FieldNames;
import com.hust.grid.leesf.ordertest.common.ItemPair;

import backtype.storm.task.OutputCollector;
import backtype.storm.task.TopologyContext;
import backtype.storm.topology.OutputFieldsDeclarer;
import backtype.storm.topology.base.BaseRichBolt;
import backtype.storm.tuple.Fields;
import backtype.storm.tuple.Tuple;
import backtype.storm.tuple.Values;

/**
* 计算商品对出现的次数
*
* @author leesf
*
*/
public class PairCountBolt extends BaseRichBolt {
    private static final long serialVersionUID = 1L;

    private OutputCollector collector;
    private Map<ItemPair, Integer> pairCounts; // 存储商品对及其出现的次数

    public void prepare(@SuppressWarnings("rawtypes") Map conf, TopologyContext context, OutputCollector collector) {
        this.collector = collector;
        this.pairCounts = new HashMap<ItemPair, Integer>();
    }

    public void execute(Tuple tuple) {
        String item1 = tuple.getStringByField(FieldNames.ITEM1);
        String item2 = tuple.getStringByField(FieldNames.ITEM2);

        ItemPair itemPair = new ItemPair(item1, item2);
        int pairCount = 0;

        if (pairCounts.containsKey(itemPair)) { // 包含商品对
            // 取出商品对出现的次数
            pairCount = pairCounts.get(itemPair);
        }
        // 更新出现次数
        pairCount++;

        pairCounts.put(itemPair, pairCount);

        collector.emit(new Values(item1, item2, pairCount));
    }

    public void declareOutputFields(OutputFieldsDeclarer declarer) {
        // 声明元组字段
        declarer.declare(new Fields(FieldNames.ITEM1, FieldNames.ITEM2, FieldNames.PAIR_COUNT));
    }
}

四 PairTotalCountBolt实现
package com.hust.grid.leesf.ordertest.bolt;

import java.util.Map;

import com.hust.grid.leesf.ordertest.common.FieldNames;

import backtype.storm.task.OutputCollector;
import backtype.storm.task.TopologyContext;
import backtype.storm.topology.OutputFieldsDeclarer;
import backtype.storm.topology.base.BaseRichBolt;
import backtype.storm.tuple.Fields;
import backtype.storm.tuple.Tuple;
import backtype.storm.tuple.Values;

/**
* 计算商品对总数
*
* @author leesf
*
*/
public class PairTotalCountBolt extends BaseRichBolt {
    private static final long serialVersionUID = 1L;

    private OutputCollector collector;
    private int totalCount; // 商品对总数

    public void prepare(@SuppressWarnings("rawtypes") Map conf, TopologyContext context, OutputCollector collector) {
        this.collector = collector;
        totalCount = 0;
    }

    public void execute(Tuple tuple) {
        totalCount++; // 每收到一个元组,便增加商品对总数
        collector.emit(new Values(totalCount)); // 发射商品对总数
    }

    public void declareOutputFields(OutputFieldsDeclarer declarer) {
        // 声明元组字段
        declarer.declare(new Fields(FieldNames.TOTAL_COUNT));
    }
}

五 SupportComputeBolt实现
package com.hust.grid.leesf.ordertest.bolt;

import java.util.HashMap;
import java.util.Map;

import com.hust.grid.leesf.ordertest.common.FieldNames;
import com.hust.grid.leesf.ordertest.common.ItemPair;

import backtype.storm.task.OutputCollector;
import backtype.storm.task.TopologyContext;
import backtype.storm.topology.OutputFieldsDeclarer;
import backtype.storm.topology.base.BaseRichBolt;
import backtype.storm.tuple.Fields;
import backtype.storm.tuple.Tuple;
import backtype.storm.tuple.Values;

/**
* 计算商品对的支持度
*
* @author leesf
*
*/
public class SupportComputeBolt extends BaseRichBolt {
    private static final long serialVersionUID = 1L;

    private OutputCollector collector;
    private Map<ItemPair, Integer> pairCounts; // 存储商品对及其出现的次数
    private int pairTotalCount; // 商品对总数

    public void prepare(@SuppressWarnings("rawtypes") Map conf, TopologyContext context, OutputCollector collector) {
        this.collector = collector;
        pairCounts = new HashMap<ItemPair, Integer>();
        pairTotalCount = 0;
    }

    /**
     * 由于SupportComputeBolt订阅了多个流,其需要根据不同的字段做出不同的行为
     */
    public void execute(Tuple tuple) {
        if (tuple.getFields().get(0).equals(FieldNames.TOTAL_COUNT)) { // 对应PairTotalCountBolt
            // 取出商品对总数量
            pairTotalCount = tuple.getIntegerByField(FieldNames.TOTAL_COUNT);
        } else if (tuple.getFields().size() == 3) { // 对应PairCountBolt
            // 取出商品及其商品对出现的次数
            String item1 = tuple.getStringByField(FieldNames.ITEM1);
            String item2 = tuple.getStringByField(FieldNames.ITEM2);
            int pairCount = tuple.getIntegerByField(FieldNames.PAIR_COUNT);
            // 存储商品对及其次数
            pairCounts.put(new ItemPair(item1, item2), pairCount);
        } else if (tuple.getFields().get(0).equals(FieldNames.COMMAND)) { // 对应CommandSpout
            for (ItemPair itemPair : pairCounts.keySet()) { // 遍历商品对
                // 计算商品支持度,使用商品对出现的次数除以商品对总数量
                double itemSupport = (double) (pairCounts.get(itemPair).intValue()) / pairTotalCount;

                collector.emit(new Values(itemPair.getItem1(), itemPair.getItem2(), itemSupport));
            }
        }
    }

    public void declareOutputFields(OutputFieldsDeclarer declarer) {
        // 定义元组字段
        declarer.declare(new Fields(FieldNames.ITEM1, FieldNames.ITEM2, FieldNames.SUPPORT));
    }

}

六 ConfidenceComputeBolt实现
package com.hust.grid.leesf.ordertest.bolt;

import java.util.HashMap;
import java.util.Map;

import com.hust.grid.leesf.ordertest.common.ConfKeys;
import com.hust.grid.leesf.ordertest.common.FieldNames;
import com.hust.grid.leesf.ordertest.common.ItemPair;

import backtype.storm.task.OutputCollector;
import backtype.storm.task.TopologyContext;
import backtype.storm.topology.OutputFieldsDeclarer;
import backtype.storm.topology.base.BaseRichBolt;
import backtype.storm.tuple.Fields;
import backtype.storm.tuple.Tuple;
import backtype.storm.tuple.Values;
import redis.clients.jedis.Jedis;

/**
* 计算商品对的置信度
*
* @author leesf
*/
public class ConfidenceComputeBolt extends BaseRichBolt {
    private static final long serialVersionUID = 1L;

    private OutputCollector collector;
    private Map<ItemPair, Integer> pairCounts; // 存储商品对及其出现的次数

    private String host;
    private int port;
    private Jedis jedis;

    public void prepare(@SuppressWarnings("rawtypes") Map conf, TopologyContext context, OutputCollector collector) {
        this.collector = collector;
        this.host = conf.get(ConfKeys.REDIS_HOST).toString();
        this.port = Integer.parseInt(conf.get(ConfKeys.REDIS_PORT).toString());
        pairCounts = new HashMap<ItemPair, Integer>();
        connectToRedis();
    }

    private void connectToRedis() {
        jedis = new Jedis(host, port);
        jedis.connect();
    }

    /**
     * 由于ConfidenceComputeBolt订阅了多个流,其需要根据元组不同的字段做出不同的行为
     */
    public void execute(Tuple tuple) {
        if (tuple.getFields().size() == 3) { // 对应PairCountBolt
            // 取出商品对及其出现次数
            String item1 = tuple.getStringByField(FieldNames.ITEM1);
            String item2 = tuple.getStringByField(FieldNames.ITEM2);
            int pairCount = tuple.getIntegerByField(FieldNames.PAIR_COUNT);

            pairCounts.put(new ItemPair(item1, item2), pairCount);
        } else if (tuple.getFields().get(0).equals(FieldNames.COMMAND)) { // 对应CommandSpout,需要进行统计
            for (ItemPair itemPair : pairCounts.keySet()) { // 遍历商品对
                // 从redis中取出商品对中商品出现的次数
                double item1Count = Integer.parseInt(jedis.hget("itemCounts", itemPair.getItem1()));
                double item2Count = Integer.parseInt(jedis.hget("itemCounts", itemPair.getItem2()));
                double itemConfidence = pairCounts.get(itemPair).intValue();

                // 计算商品对置信度
                if (item1Count < item2Count) {
                    itemConfidence /= item1Count;
                } else {
                    itemConfidence /= item2Count;
                }

                collector.emit(new Values(itemPair.getItem1(), itemPair.getItem2(), itemConfidence));
            }
        }
    }

    public void declareOutputFields(OutputFieldsDeclarer declarer) {
        // 声明元组字段
        declarer.declare(new Fields(FieldNames.ITEM1, FieldNames.ITEM2, FieldNames.CONFIDENCE));
    }
}

七 FilterBolt实现
package com.hust.grid.leesf.ordertest.bolt;

import java.util.Map;

import org.json.simple.JSONObject;

import com.hust.grid.leesf.ordertest.common.ConfKeys;
import com.hust.grid.leesf.ordertest.common.FieldNames;
import com.hust.grid.leesf.ordertest.common.ItemPair;

import backtype.storm.task.OutputCollector;
import backtype.storm.task.TopologyContext;
import backtype.storm.topology.OutputFieldsDeclarer;
import backtype.storm.topology.base.BaseRichBolt;
import backtype.storm.tuple.Fields;
import backtype.storm.tuple.Tuple;
import backtype.storm.tuple.Values;
import redis.clients.jedis.Jedis;

/**
* 过滤符合条件的商品对并存入redis
*
* @author leesf
*
*/
public class FilterBolt extends BaseRichBolt {
    private static final long serialVersionUID = 1L;

    // 商品对的支持度和置信度
    private static final double SUPPORT_THRESHOLD = 0.01;
    private static final double CONFIDENCE_THRESHOLD = 0.01;

    private OutputCollector collector;

    private Jedis jedis;
    private String host;
    private int port;

    public void prepare(@SuppressWarnings("rawtypes") Map conf, TopologyContext context, OutputCollector collector) {
        this.collector = collector;
        this.host = conf.get(ConfKeys.REDIS_HOST).toString();
        this.port = Integer.parseInt(conf.get(ConfKeys.REDIS_PORT).toString());
        connectToRedis();
    }

    private void connectToRedis() {
        jedis = new Jedis(host, port);
        jedis.connect();
    }

    @SuppressWarnings("unchecked")
    public void execute(Tuple tuple) {
        // 取出商品并构造商品对
        String item1 = tuple.getStringByField(FieldNames.ITEM1);
        String item2 = tuple.getStringByField(FieldNames.ITEM2);
        ItemPair itemPair = new ItemPair(item1, item2);
        String pairString = itemPair.toString();

        double support = 0;
        double confidence = 0;

        if (tuple.getFields().get(2).equals(FieldNames.SUPPORT)) { // 对应SupportComputeBolt
            // 获取支持度并存入redis
            support = tuple.getDoubleByField(FieldNames.SUPPORT);
            jedis.hset("supports", pairString, String.valueOf(support));
        } else if (tuple.getFields().get(2).equals(FieldNames.CONFIDENCE)) { // 对应ConfidenceComputeBolt
            // 获取置信度并存入redis
            confidence = tuple.getDoubleByField(FieldNames.CONFIDENCE);
            jedis.hset("confidences", pairString, String.valueOf(confidence));
        }

        if (!jedis.hexists("supports", pairString) || !jedis.hexists("confidences", pairString)) { // 商品对的支持度和置信度还未计算完成,返回
            return;
        }
        // 商品对的支持度和置信度已经计算完成
        support = Double.parseDouble(jedis.hget("supports", pairString));
        confidence = Double.parseDouble(jedis.hget("confidences", pairString));

        if (support >= SUPPORT_THRESHOLD && confidence >= CONFIDENCE_THRESHOLD) { // 支持度和置信度超过阈值
            // 将该商品对信息存入redis中
            JSONObject pairValue = new JSONObject();
            pairValue.put(FieldNames.SUPPORT, support);
            pairValue.put(FieldNames.CONFIDENCE, confidence);

            jedis.hset("recommendedPairs", pairString, pairValue.toJSONString());

            collector.emit(new Values(item1, item2, support, confidence));
        } else { // 不高于阈值,则从redis中删除
            jedis.hdel("recommendedPairs", pairString);
        }
    }

    public void declareOutputFields(OutputFieldsDeclarer declarer) {
        // 声明元组字段
        declarer.declare(new Fields(FieldNames.ITEM1, FieldNames.ITEM2, FieldNames.SUPPORT, FieldNames.CONFIDENCE));
    }
}

八 参考
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值