hadoop面试题

面试题:网上的面试题,做出来大家探讨一下,有好的方法大家分享^^
题目:

当前日志采样格式为
a,b,c,d
b,b,f,e
a,a,c,f
请用你最熟悉的语言编写一个mapreduce,并计算第四列每个元素出现的个数
package ms;

import java.io.BufferedReader;
import java.io.FileReader;
import java.io.IOException;
import java.util.HashMap;
import java.util.Map;

import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.filecache.DistributedCache;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
/**
* @Title: MS.java 
* @Package ms 
* @author zfy1355_gmail_com   
* @date 2016年2月24日 下午6:37:42 
* @version V1.0
 */
public class MS extends Configured implements Tool {
    public static class MyMapper extends Mapper<LongWritable, Text, Text, IntWritable>{
        private Map<String,String> dataMap = new HashMap<String,String>();
        private IntWritable one = new IntWritable(1); 
        @Override
        protected void setup(
                Mapper<LongWritable, Text, Text, IntWritable>.Context context)
                throws IOException, InterruptedException {
            BufferedReader in = null;
            Path[] paths = DistributedCache.getLocalCacheFiles(context.getConfiguration());
    /*      dataMap.put("d", null);
            dataMap.put("e", null);
            dataMap.put("f", null);*/
            try {
                for(Path path : paths){
                    if(path.toString().contains("data")){
                        in = new BufferedReader(new FileReader(path.toString()));
                        String data = "";
                        while((data = in.readLine())!=null){
                            dataMap.put(data.split(",")[3], null);
                        }
                    }
                }
            } catch (Exception e) {
                e.printStackTrace();
            } finally{
                if(in != null){
                    in.close();
                }
            }
        }
        @Override
        protected void map(LongWritable key, Text value,
                Mapper<LongWritable, Text, Text, IntWritable>.Context context)
                throws IOException, InterruptedException {
           String[] vs = value.toString().split(",");
           for(int i=0;i<vs.length;i++)
               if(dataMap.containsKey(vs[i]))
                   context.write(new Text(vs[i]), one);
        }
    }

    public static class MyReducer extends Reducer<Text, IntWritable, Text, IntWritable>{
        @Override
        protected void reduce(Text key, Iterable<IntWritable> values,
                Reducer<Text, IntWritable, Text, IntWritable>.Context context)
                throws IOException, InterruptedException {
            int count =0;
            for(IntWritable c : values){
                count++;
            }
            context.write(key, new IntWritable(count));
        }
    }

    public int run(String[] args) throws Exception {
        Path inPath = new Path("hdfs://ns1/user/hadoop/data.txt");

        Job job = new Job(getConf(),"data count");
        job.setJarByClass(getClass());

        FileInputFormat.addInputPath(job,inPath);
        FileOutputFormat.setOutputPath(job, new Path("hdfs://ns1/user/hadoop/dataout"));

          DistributedCache.addCacheFile(inPath.toUri(), job
                    .getConfiguration());

        job.setMapperClass(MyMapper.class);
        job.setReducerClass(MyReducer.class);


        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(IntWritable.class);
        return job.waitForCompletion(true)?0:1;
    }

    public static void main(String[] args) {
        try {
            int exitCode = ToolRunner.run(new MS(), args);
        } catch (Exception e) {
            e.printStackTrace();
        }
    }
}
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值