服务器提交mr任务
- 正常任务如下
hadoop jar fei-hadoop.jar com.fei.bigdata.hadoop.mapreduce.wc.WCDriver /wordcount/input1 /wordcount/output2
- 需要额外依赖jar,得使用HADOOP_CLASSPATH和-libjars
export HADOOP_CLASSPATH=$HADOOP_CLASSPATH:~/lib/mysql-connector-java-5.1.27-bin.jar
hadoop jar fei-hadoop.jar com.fei.bigdata.hadoop.mapreduce.db.MySQLReadDriver2 -libjars ~/lib/mysql-connector-java-5.1.27-bin.jar /db
其中导出mysql数据的程序需要ToolRunner如下实现
public class MySQLReadDriver2 extends Configured implements Tool {
public static void main(String[] args) throws Exception{
Configuration configuration = new Configuration();
int run = ToolRunner.run(configuration, new MySQLReadDriver2(), args);
System.exit(run);
}
@Override
public int run(String[] args) throws Exception{
String output = args[0];
// 1)获取Job对象
Configuration configuration = super.getConf();
// configuration.set(DBConfiguration.DRIVER_CLASS_PROPERTY, "com.mysql.jdbc.Driver");
DBConfiguration.configureDB(configuration, "com.mysql.jdbc.Driver",
"jdbc:mysql://hadoop01:3306/feidata", "xxxx", "xxxx");
Job job = Job.getInstance(configuration);
FileUtils.deleteOutput(configuration, output);
// 2)本job对应要执行的主类是哪个
job.setJarByClass(MySQLReadDriver2.class);
// 3)设置Mapper和Reducer
job.setMapperClass(MysqlMapper.class);
// 4)设置Mapper阶段输出数据的类型
job.setMapOutputKeyClass(NullWritable.class);
job.setMapOutputValueClass(DeptWritable.class);
// 6)设置输入和输出路径
String[] fields = {"deptno", "dname", "loc"};
DBInputFormat.setInput(job, DeptWritable.class, "dept", null, null, fields);
FileOutputFormat.setOutputPath(job, new Path(output));
// 7)提交作业
boolean result = job.waitForCompletion(true);
return 1;
}
public static class MysqlMapper extends Mapper<LongWritable, DeptWritable, NullWritable,DeptWritable> {
@Override
protected void map(LongWritable key, DeptWritable value, Context context) throws IOException, InterruptedException {
context.write(NullWritable.get(),value);
}
}
}
如果需要额外依赖100多个jar包需要shell脚本拼接,可以参考hadoop-env.sh
for f in ~/lib/*.jar
do
export HADOOP_CLASSPATH=$HADOOP_CLASSPATH:$f
done