首先还是看下我们的需求
然后拿到我们的数据
可以看到我们的数据里面还有很多空值,是还没清洗的脏数据,一会我们处理的时候需要将其处理掉.
一.统计车辆不同用途的数量分布
package hadoop.MapReduce.car.Use;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import java.io.IOException;
public class usecount {
public static class UseMapper extends Mapper<LongWritable, Text,Text, IntWritable> {
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
String[] lines = value.toString().split("\t");
if (null != lines && lines.length > 10 && !lines[10].isEmpty()) {
context.write(new Text(lines[10]), new IntWritable(1));
}
}
}
public static class UseReduce extends Reducer<Text,IntWritable,Text,IntWritable>{
@Override
protected void reduce(Text key, Iterable<IntWritable> values, Context context) throws
IOException, InterruptedException {
int count = 0;
for (IntWritable value:values){
count += value.get();
}
context.write(key,new IntWritable(count));
}
}
public static class UseDriver{
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
Configuration conf = new Configuration();
Job job = new Job(conf);
job.setJarByClass(UseDriver.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(IntWritable.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
job.setMapperClass(UseMapper.class);
job.setReducerClass(UseReduce.class);
FileInputFormat.addInputPath(job,new Path("D:\\a\\cars.txt"));
FileOutputFormat.setOutputPath(job,new Path("D:\\a\\a1\\1"));
job.waitForCompletion(true);
}
}
}
可以看到我们这边用lines[10].isEmpty()去清洗了空格,若不想用这种方法还可以用try/catch将其抛出异常,
一开始我们用的line[10] != null,后来发现不行,还是会将空格输出出来.
二.统计山西省2013年每个月的汽车销售数量的比例
package hadoop.MapReduce.car.BiLi;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.DoubleWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import java.io.IOException;
import java.util.HashMap;
import java.util.Map;
import java.util.Set;
public class bilicount {
// static int all = 0;
public static class BiliMapper extends Mapper<LongWritable, Text,Text,LongWritable>{
@Override
protected void map(LongWritable key, Text value, Context context) t