在业务中有的时候不需要reduce,主需要map将日志解析出来,那么Job的代码如下:
@Override
public int run(String[] args) throws Exception {Configuration conf = getConf();
conf.set("fs.default.name", "hdfs://172.16.1.50:8020");
conf.set("mapred.textoutputformat.separator", ",");
conf.set("mapred.compress.map.output", "true");
String input="/user/hdfs/source/log/termreg/"+month+"/term_reg_"+yesterday+".dat";
String output = "/user/hdfs/result/RegistrationsJob/"+yesterday+"/";
Job job = new Job(conf, "RegistrationsJob");
job.setJarByClass(RegistrationsJob.class);
job.setMapperClass(RegistrationsMapper.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
job.setInputFormatClass(TextInputFormat.class);
// job.setOutputFormatClass(FileOutputFormat.class); 这个不要写,否则就一直报错
FileInputFormat.addInputPath(job, new Path(input));
FileOutputFormat.setOutputPath(job, new Path(output));
job.setNumReduceTasks(0); //reduce的数量设为0
job.waitForCompletion(true);
return 0;
}
------------------------------------------------------------------------------------------------------------
上面的做法是将Mapper的输出到文件,当我们需要将输出到mysql的时候,可以用如下做法:
Job类:
@Override
public int run(String[] args) throws Exception {
Configuration conf = getConf();
conf.set("fs.default.name", "hdfs://172.16.1.50:8020");
DBConfiguration.configureDB(conf, "com.mysql.jdbc.Driver","jdbc:mysql://172.16.1.81:3308/reportdb","admin","tonggangdasha");
yesterday = "2014-07-09";
String input="/user/hdfs/source/222log/termreg/"+month+"/termreg.log."+yesterday;
Job job = new Job(conf, "AdcModelJob");
job.setJarByClass(AdcModelJob.class);
job.setMapperClass(AdcModelMapper.class);
job.setOutputKeyClass(AdcModelBean.class);
job.setOutputValueClass(Text.class);
FileInputFormat.addInputPath(job, new Path(input));
MysqlDBOutputFormat.setOutput(job, "n_model",
"channel_id","model","resolution");
job.setNumReduceTasks(0);
job.waitForCompletion(true);
return 0;
}
Mapper类:
public class AdcModelMapper extends Mapper<LongWritable, Text, AdcModelBean, Text>{
@Override
protected void map(LongWritable key, Text value,
Context context)
throws IOException, InterruptedException {
String line = value.toString();
String fileds[] = line.split(",");
String adc = "";
String model = "";
String resolution = "";
try{
adc = fileds[5];
model = fileds[6];
resolution = fileds[9].split("-")[3];
}catch(Exception e){
e.printStackTrace();
return;
}
AdcModelBean adcModel = new AdcModelBean();
adcModel.setChannel_id(adc);
adcModel.setModel(model);
adcModel.setResolution(resolution);
context.write(adcModel, null);
}
}
Bean类:
public class AdcModelBean implements Writable, DBWritable {
private String channel_id;
private String model;
private String resolution;
@Override
public void write(PreparedStatement statement) throws SQLException {
int index = 1;
statement.setString(index++, this.getChannel_id());
statement.setString(index++, this.getModel());
statement.setString(index++, this.getResolution());
}
@Override
public void readFields(ResultSet resultSet) throws SQLException {
this.channel_id = resultSet.getString(1);
this.model = resultSet.getString(2);
this.resolution = resultSet.getString(3);
}
@Override
public void write(DataOutput out) throws IOException {
}
@Override
public void readFields(DataInput in) throws IOException {
}
public String getChannel_id() {
return channel_id;
}
public void setChannel_id(String channel_id) {
this.channel_id = channel_id;
}
public String getModel() {
return model;
}
public void setModel(String model) {
this.model = model;
}
public String getResolution() {
return resolution;
}
public void setResolution(String resolution) {
this.resolution = resolution;
}
}