本站数据集:https://download.csdn.net/download/FRESHET/12649563
类似这样的数据:
主要是最后的阈值不太懂,这里就按小于-5来提示风险
1.MAPPER类
import java.io.IOException;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
public class CancerCntMapper extends Mapper<Object, Text, Text, Text>{
@Override
protected void map(Object key, Text value, Mapper<Object, Text, Text, Text>.Context context)
throws IOException, InterruptedException {
// TODO Auto-generated method stub
// super.map(key, value, context);
String line=value.toString();
//这里用split仅取每行最后一列
String[] words=line.split(" |\t|s+");
try {
Float score=Float.valueOf(words[words.length-1].trim());
if(score<-5) {
context.write(value, new Text());
}
}catch (Exception e) {
// TODO: handle exception
}
}
}
2.REDUCE类
import java.io.IOException;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
public class CannerCntReduce extends Reducer<Text, Text, Text, Text> {
@Override
protected void reduce(Text arg0, Iterable<Text> arg1, Reducer<Text, Text, Text, Text>.Context arg2)
throws IOException, InterruptedException {
// TODO Auto-generated method stub
// super.reduce(arg0, arg1, arg2);
arg2.write(arg0, new Text("wanning!"));
}
}
3.主类
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import com.lu.map.CancerCntMapper;
import com.lu.red.CannerCntReduce;
public class CancerCnt {
public static void main(String[] args) throws Exception{
Configuration conf = new Configuration();
Job job = Job.getInstance(conf,"cancer count");
job.setJarByClass(CancerCnt.class);
job.setMapperClass(CancerCntMapper.class);
job.setReducerClass(CannerCntReduce.class);
//这里类型变了
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
Path in= new Path("hdfs://192.168.1.27:9000/test/wordcnt/in/data1.txt");
Path out= new Path("hdfs://192.168.1.27:9000/test/wordcnt/out/3");
FileInputFormat.addInputPath(job, in);
FileOutputFormat.setOutputPath(job, out);
System.exit(job.waitForCompletion(true) ? 0 : 1);
}
}
结果