同一种搜索词,哪个网站被用户访问的最多
FourMapper.java
package com.hniu.bigdata.hadoop.Four;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.util.StringUtils;
import java.io.IOException;
import java.text.SimpleDateFormat;
import java.util.Date;
public class FourMapper extends Mapper<LongWritable, Text,Text, FourSortBean> {
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
String line = value.toString();
// 日期数据处理
String[] values = line.split("\\s");
String time = values[values.length - 1];
SimpleDateFormat format = new SimpleDateFormat("yyyy-MM-dd");
String dateTime = format.format(new Date(Long.parseLong(time)));
// 域名切分
String url = values[4];
values[values.length - 1] = dateTime;
String domain = url.split("/")[0];
values[4] = domain;
line = StringUtils.join(" ",values);
String keyStr =values[0] +"_"+domain;
if(dateTime.contains("2019")) {
FourSortBean data = new FourSortBean()