访问次数最多的前十个搜索词

访问次数最多的前十个搜索词

ThirdMapper.java

package com.hniu.bigdata.hadoop.Third;


import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.util.StringUtils;

import java.io.IOException;
import java.text.SimpleDateFormat;
import java.util.Date;

public class ThirdMapper extends Mapper<LongWritable, Text,Text, ThirdSortBean> {

    @Override
    protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {

        String line = value.toString();
        // 日期数据处理
        String[] values = line.split("\\s");
        String time = values[values.length - 1];
        SimpleDateFormat format = new SimpleDateFormat("yyyy-MM-dd");
        String dateTime = format.format(new Date(Long.parseLong(time)));
        // 域名切分
        String url = values[4];
        values[values.length - 1] = dateTime;
        String domain = url.split("/")[0];
        values[4] = domain;
        line = StringUtils.join(" ",values);
        String keyWords =values[1] +"_"+domain;

        ThirdSortBean data = new ThirdSortBean();
        data.setKeyWords(values[1]);
        data.setDomain(domain);
        data.setTotal_click(1);
        context.write(new Text(keyWords), data);

    }
}

ThirdReduce.java

package com.hniu.bigdata.hadoop.Third;

import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Map;
import java.util.TreeMap;

public class ThirdReduce extends Reducer<Text,ThirdSortBean, Text, IntWritable> {

    private TreeMap<ThirdSortBean,String> topTen = new TreeMap<ThirdSortBean, String>();
    @Override
    protected void reduce(Text key, Iterable<ThirdSortBean> values, Context context) throws IOException, InterruptedException {

        ThirdSortBean result = new ThirdSortBean();
        int total_click = 0;
        for (ThirdSortBean data : values){
            total_click += data.getTotal_click();
            result.setKeyWords(data.getKeyWords());
            result.setDomain(data.getDomain());
        }
        result.setTotal_click(total_click);
       // context.write(key,new IntWritable(total_click));
        String keywords = key.toString().split("_")[0];
        if (topTen.values().contains(keywords)){
            int index=new ArrayList<String>(topTen.values()).indexOf(keywords);
            ThirdSortBean tmpData= (ThirdSortBean) topTen.keySet().toArray()[index];
            if(total_click > tmpData.getTotal_click()){
                topTen.remove(tmpData);
                topTen.put(result,keywords);
            }
        }else {
            topTen.put(result,keywords);
        }
        if (topTen.size()>10){
            topTen.remove(topTen.lastKey());
        }
        //context.write(key,new IntWritable(total_click));
    }

    @Override
   protected void cleanup(Context context) throws IOException, InterruptedException {
       for (Map.Entry<ThirdSortBean,String> entry:topTen.entrySet()){
           context.write(new Text(entry.getValue()),new IntWritable(entry.getKey().getTotal_click()));
      }
    }
}

ThirdSortBean.java

package com.hniu.bigdata.hadoop.Third;

import org.apache.hadoop.io.WritableComparable;

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;

public class ThirdSortBean  implements WritableComparable<ThirdSortBean> {
    private String  keyWords;
    private String  domain;
    private int  total_click;


    public ThirdSortBean(){}

    public ThirdSortBean(String keyWords, String domain, int total_click) {
        this.keyWords = keyWords;
        this.domain = domain;
        this.total_click = total_click;
    }

    public String getKeyWords() {
        return keyWords;
    }

    public void setKeyWords(String keyWords) {
        this.keyWords = keyWords;
    }

    public String getDomain() {
        return domain;
    }

    public void setDomain(String domain) {
        this.domain = domain;
    }

    public int getTotal_click() {
        return total_click;
    }

    public void setTotal_click(int total_click) {
        this.total_click = total_click;
    }

    public int compareTo(ThirdSortBean o) {
        return  total_click > o.getTotal_click() ? -1 :(total_click == o.getTotal_click() ? 0 :1);
    }

    public void write(DataOutput dataOutput) throws IOException {
        dataOutput.writeUTF(keyWords);
        dataOutput.writeUTF(domain);
        dataOutput.writeInt(total_click);
    }

    public void readFields(DataInput dataInput) throws IOException {
        keyWords = dataInput.readUTF();
        domain = dataInput.readUTF();
        total_click = dataInput.readInt();
    }

    @Override
    public String toString() {
        return "{" +
                "keyWords='" + keyWords + '\'' +
                ", domain='" + domain + '\'' +
                ", total_click=" + total_click +
                '}';
    }
}

ThirdDriver.java

package com.hniu.bigdata.hadoop.Third;


import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

public class ThirdDriver {
    public static void main(String[] args) throws Exception{
        Configuration configuration = new Configuration();

        configuration.set("fs.defaultFS","hdfs://192.168.179.46:8020");

        Job job = Job.getInstance(configuration, "word count");

        job.setJarByClass(ThirdDriver.class);
        job.setMapperClass(ThirdMapper.class);
        job.setReducerClass(ThirdReduce.class);

        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(ThirdSortBean.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(ThirdSortBean.class);

        FileInputFormat.addInputPath(job, new Path("/xyz"));
        FileOutputFormat.setOutputPath(job, new Path("/Third_Data"));
        job.waitForCompletion(true);

    }

}

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值