先写一个自己的MyRecordWriter类 extends RecordWriter
package calllog;
import java.io.IOException;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.SQLException;
import java.sql.Statement;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.RecordWriter;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
public class IRecordWrite extends RecordWriter{
@Override
public void write(Text key, IntWritable value){
String driver = "com.mysql.jdbc.Driver";
String url = "jdbc:mysql://192.168.120.110:3306/calllog?characterEncoding=UTF-8";
String user = "root";
String password = "******";
System.out.println("开始写入数据");
Connection conn = null;
Statement statement = null;
//数据处理
String string = key.toString();
String[] split = string.split("\t");
String zhujian = split[0]+"_"+split[1]+"_"+split[2];
try {
Class.forName(driver);
conn = DriverManager.getConnection(url, user, password);
conn.setAutoCommit(true);
statement = conn.createStatement();
//有则更新,无则插入
//INSERT INTO `tb_call` (`id_date_contact`, `id_date_dimension`, `id_contact`, `call_sum`, `call_duration_sum`) VALUES (?, ?, ?, ?, ?) ON DUPLICATE KEY UPDATE `id_date_contact` = ?;
String sql = "INSERT INTO mylog values ('"+zhujian+"','"+split[0]+"','"+split[1]+"','"+split[2]+"','"+value.toString()+"') ON DUPLICATE KEY UPDATE name_phone_time = '"+zhujian+"';";
System.out.println(sql);
statement.execute(sql);
System.err.println("---------插入成功!--------------------------");
} catch (ClassNotFoundException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (SQLException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}finally {
try {
if (statement!=null) {
statement.close();
}
if (conn!=null) {
conn.close();
}
} catch (SQLException e) {
e.printStackTrace();
}
}
}
@Override
public void close(TaskAttemptContext context) throws IOException, InterruptedException {
// TODO Auto-generated method stub
}
}
再写一个自己的outputFormat extends OutputFormat
package calllog;
import java.io.IOException;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.JobContext;
import org.apache.hadoop.mapreduce.OutputCommitter;
import org.apache.hadoop.mapreduce.OutputFormat;
import org.apache.hadoop.mapreduce.RecordWriter;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
public class IOutputFormat extends OutputFormat{
@Override
public RecordWriter getRecordWriter(TaskAttemptContext context)
throws IOException, InterruptedException {
return new IRecordWrite();
}
@Override
public void checkOutputSpecs(JobContext context) throws IOException, InterruptedException {
}
@Override
public OutputCommitter getOutputCommitter(TaskAttemptContext context) throws IOException, InterruptedException {
return (new org.apache.hadoop.mapreduce.lib.output.NullOutputFormat())
.getOutputCommitter(context);
}
}
最后再driver端 自定义自己的输出类
public class Idriver {
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
Job job = Job.getInstance(conf);
job.setJarByClass(Idriver.class);
// 3 关 联 map
job.setMapperClass(Imap.class);
job.setReducerClass(Ireduce.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(IntWritable.class);
//分区
//job.setPartitionerClass(IPartitioner.class);
//job.setNumReduceTasks(4);
// 4 设置最终输出数据类型
//job.setOutputKeyClass(Text.class);
//job.setOutputValueClass(IntWritable.class);
job.setOutputFormatClass(IOutputFormat.class);
// 5 设置输入输出路径
FileInputFormat.setInputPaths(job, new Path(args[0]));
//FileOutputFormat.setOutputPath(job, new Path(args[1]));
// 8 提交
boolean result = job.waitForCompletion(true);
System.exit(result ? 0 : 1);
}
}