前言 刚接触到mapreduce程序的人可能不太熟悉,今天翻出来了好久之前写的一段代码,贴到博客上吧,希望对新手有所帮助。欢迎一起讨论,共同进步。
MapReduce多文件输出代码如下
public class ReduceLiantongBushuju extends Reducer<Text, Text, Text, Text>{
private static Text textValue = new Text();
private static Text miyao = new Text();
private static Text outValue = new Text();
byte b1[] = {0x01};
String st= new String(b1);
Set<String> ss = new HashSet<String>();
Set<String> imeiMiyao = new HashSet<String>();
private static ObjectMapper objectMapper = new ObjectMapper();
private static final String urlPath = "URL";//没给出,涉及到公司机密
private static final String table = "pengjing_mobile_01";
HttpClient httpClient;
private MultipleOutputs<Text, Text> outputs;
DateFormat formatter = new SimpleDateFormat("yyyy-MM-dd");
String path =null;
@Override
protected void setup(Reducer<Text, Text, Text, Text>.Context context)
throws IOException, InterruptedException {
httpClient = new DefaultHttpClient();
outputs = new MultipleOutputs<Text, Text>(context);
path = context.getConfiguration().get("miyaopath"); //获取驱动类传入的字符串,作为多文件输出的地址
}
protected void reduce(Text key, Iterable<Text> values,
Context context) throws java.io.IOException, InterruptedException {
context.getCounter("Monitor","reduce num").increment(1L);
String str=null;
String str1=null;
String info =null;
for (Text text : values) {
ss.add(text.toString());
}
if(ss.size()>=2){
for(String te:ss){
if(te.indexOf(st)!=-1){
str=te;
}else{
str1=te+st;
}
}
textValue.set(str1+str);
}else if(ss.size()==1){
for(String te:ss){
if(te.indexOf(st)!=-1){
context.getCounter("Monitor","key not data num").increment(1L);
for (int i = 0; i < 3; i++) {
context.getCounter("Monitor","key2 not data num").increment(1L);
try {
String newKey = getHttpData(key.toString());
if (!newKey.equals("-1") && !newKey.equals("1")) {
info = newKey + st + te;
context.getCounter("Monitor","http out num").increment(1L);
textValue.set(info);
miyao.set(key+"|"+newKey);
outputs.write(miyao,outValue, path); //输出文件
context.getCounter("Monitor","reduce ok1 num").increment(1L);
break;
}
} catch (Exception e) {
e.printStackTrace();
}
}
}
}
}
if((str != null && str1 !=null) || info != null){
context.write(textValue, outValue); //输出文件
context.getCounter("Monitor",
"reduce ok1 num").increment(1L);
}
imeiMiyao.clear();
ss.clear();
}
@Override
protected void cleanup(Reducer<Text, Text, Text, Text>.Context context)
throws IOException, InterruptedException {
outputs.close();
}
getHttpData()方法涉及到公司机密,且跟本篇文章关系不大,就不列出了。
}
MapReduce多文件输出不涉及到map类,也不列出了。
下面是驱动类
public class MainLiantongBushujuJob {
public static void main(String[] args) throws Exception{
Configuration config = new Configuration();
config.set("miyaopath",args[4]); //传入字符串args[4]
Job job = Job.getInstance(config);
FileSystem fs = FileSystem.get(config);
FileInputFormat.addInputPaths(job, args[0]);
FileInputFormat.addInputPaths(job, args[1]);
Path outPath = new Path(args[2]);
if (fs.exists(outPath)) {
fs.delete(outPath, true);
}
FileOutputFormat.setOutputPath(job, outPath);
job.setJarByClass(MainLiantongBushujuJob.class);
job.setInputFormatClass(TextInputFormat.class);
job.setMapperClass(MapLiantongBushuju.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(Text.class);
job.setReducerClass(ReduceLiantongBushuju.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
job.setOutputFormatClass(TextOutputFormat.class);
int num = Integer.parseInt(args[3]);
job.setNumReduceTasks(num);
if (job.waitForCompletion(true)) {
System.out.println("ok!");
} else{
System.exit(0);
}
}
}