数据算法-hadoop7 购物车分析

主要就是通过递归把[a,b,c,d]切分成n个的组
如果n=2
为:[a,b],[a,c],[a,d],[b,c],[b,d],[c,d]
如果n=3
为:[a,b,c],[a,b,d],[b,c,d],[a,c,d]
然后统计

public class MBAMapper extends Mapper<LongWritable, Text, Text, IntWritable> {
    public static final int DEFAULT_NUMBER_OF_PAIRS = 2;
    private static final Text reducerKey = new Text();
    private static final IntWritable NUMBER_ONE = new IntWritable(1);
    int numberOfPairs;

    @Override
    protected void setup(Context context) throws IOException,InterruptedException {
        this.numberOfPairs = context.getConfiguration().getInt(
                "number.of.pairs", DEFAULT_NUMBER_OF_PAIRS);
    }

    public void map(LongWritable key, Text value, Context context)
            throws IOException, InterruptedException {
        String line = value.toString();
        List<String> items = convertItemsToList(line);
        if ((items == null) || (items.isEmpty())) {
            // no mapper output will be generated
            return;
        }
        generateMapperOutput(numberOfPairs, items, context);
    }

    private static List<String> convertItemsToList(String line) {
        if ((line == null) || (line.length() == 0)) {
            // no mapper output will be generated
            return null;
        }
        String[] tokens = StringUtils.split(line, ",");
        if ((tokens == null) || (tokens.length == 0)) {
            return null;
        }
        List<String> items = new ArrayList<String>();
        for (String token : tokens) {
            if (token != null) {
                items.add(token.trim());
            }
        }
        return items;
    }

    private void generateMapperOutput(int numberOfPairs, List<String> items,
            Context context) throws IOException, InterruptedException {
        List<List<String>> sortedCombinations = Combination
                .findSortedCombinations(items, numberOfPairs);
        for (List<String> itemList : sortedCombinations) {
            System.out.println("itemlist=" + itemList.toString());
            reducerKey.set(itemList.toString());
            context.write(reducerKey, NUMBER_ONE);
        }
    }

}
public class MBAReducer  extends Reducer<Text, IntWritable, Text, IntWritable>{
    public void reduce(Text key, Iterable<IntWritable> values, Context context)
            throws IOException, InterruptedException {
        int sum = 0; // total items paired
        for (IntWritable value : values) {
            sum += value.get();
        }
        context.write(key, new IntWritable(sum));
    }
}
public static void main(String[] args) throws Exception {
        Configuration conf1 = new Configuration();
        System.setProperty("hadoop.home.dir", "E:\\hadoop-2.6.0");
        conf1.setBoolean("dfs.permissions", false);

        Job job = Job.getInstance(conf1, "MBA");
        job.setMapperClass(MBAMapper.class);
        job.setReducerClass(MBAReducer.class);
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(IntWritable.class);
        // output format
        job.setOutputFormatClass(TextOutputFormat.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(IntWritable.class);
        job.setNumReduceTasks(1);

        FileInputFormat.setInputPaths(job, new Path(
                "E:\\java\\test\\07\\07.txt"));
        FileOutputFormat
                .setOutputPath(job, new Path("E:\\java\\test\\07\\out"));
        if (job.waitForCompletion(true)) {
            // log.info("MR run successfully");

        } else {
            // log.error("MR run failed");

        }
    }

输入

crackers,bread,banana
crackers,coke,butter,coffee
crackers,bread
crackers,bread
crackers,bread,coffee
butter,coke
butter,coke,bread,crackers

输出

[banana, bread] 1
[banana, crackers]  1
[bread, butter] 1
[bread, coffee] 1
[bread, coke]   1
[bread, crackers]   5
[butter, coffee]    1
[butter, coke]  3
[butter, crackers]  2
[coffee, coke]  1
[coffee, crackers]  2
[coke, crackers]    2
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值