packagecom.hebut.mr;
importJava.io.IOException;
importjava.io.DataInput;
importjava.io.DataOutput;
importjava.sql.PreparedStatement;
importjava.sql.ResultSet;
importjava.sql.SQLException;
importjava.util.Iterator;
importjava.util.StringTokenizer;
importorg.apache.Hadoop.filecache.DistributedCache;
importorg.apache.hadoop.fs.Path;
importorg.apache.hadoop.io.IntWritable;
importorg.apache.hadoop.io.Text;
importorg.apache.hadoop.io.Writable;
importorg.apache.hadoop.mapred.FileInputFormat;
importorg.apache.hadoop.mapred.JobClient;
importorg.apache.hadoop.mapred.JobConf;
importorg.apache.hadoop.mapred.MapReduceBase;
importorg.apache.hadoop.mapred.Mapper;
importorg.apache.hadoop.mapred.OutputCollector;
importorg.apache.hadoop.mapred.Reducer;
importorg.apache.hadoop.mapred.Reporter;
importorg.apache.hadoop.mapred.TextInputFormat;
importorg.apache.hadoop.mapred.lib.db.DBOutputFormat;
importorg.apache.hadoop.mapred.lib.db.DBWritable;
importorg.apache.hadoop.mapred.lib.db.DBConfiguration;
publicclassWriteDB {
// Map处理过程
publicstaticclassMap extendsMapReduceBase implements
Mapper {
privatefinalstaticIntWritable one= newIntWritable(1);
privateText word= newText();
@Override
publicvoidmap(Object key, Text value,
OutputCollector output, Reporter reporter)
throwsIOException {
String line = value.toString();
StringTokenizer tokenizer = newStringTokenizer(line);
while(tokenizer.hasMoreTokens()) {
word.set(tokenizer.nextToken());
output.collect(word, one);
}
}
}
// Combine处理过程
publicstaticclassCombine extendsMapReduceBase implements
Reducer {
@Override
publicvoidreduce(Text key, Iterator values,
OutputCollector output, Reporter reporter)
throwsIOException {
intsum = 0;
while(values.hasNext()) {
sum += values.next().get();
}
output.collect(key, newIntWritable(sum));
}
}
// Reduce处理过程
publicstaticclassReduce extendsMapReduceBase implements
Reducer {
@Override
publicvoidreduce(Text key, Iterator values,
OutputCollector collector, Reporter reporter)
throwsIOException {
intsum = 0;
while(values.hasNext()) {
sum += values.next().get();
}
WordRecord wordcount = newWordRecord();
wordcount.word= key.toString();
wordcount.number= sum;
collector.collect(wordcount, newText());
}
}
publicstaticclassWordRecord implementsWritable, DBWritable {
publicString word;
publicintnumber;
@Override
publicvoidreadFields(DataInput in) throwsIOException {
this.word= Text.readString(in);
this.number= in.readInt();
}
@Override
publicvoidwrite(DataOutput out) throwsIOException {
Text.writeString(out, this.word);
out.writeInt(this.number);
}
@Override
publicvoidreadFields(ResultSet result) throwsSQLException {
this.word= result.getString(1);
this.number= result.getInt(2);
}
@Override
publicvoidwrite(PreparedStatement stmt) throwsSQLException {
stmt.setString(1, this.word);
stmt.setInt(2, this.number);
}
}
publicstaticvoidmain(String[] args) throwsException {
JobConf conf = newJobConf(WriteDB.class);
//这句话很关键
conf.set("mapred.job.tracker", "192.168.1.2:9001");
DistributedCache.addFileToClassPath(newPath(
"/lib/MySQL-connector-java-5.1.18-bin.jar"), conf);
//设置输入输出类型
conf.setInputFormat(TextInputFormat.class);
conf.setOutputFormat(DBOutputFormat.class);
//不加这两句,通不过,但是网上给的例子没有这两句。
conf.setOutputKeyClass(Text.class);
conf.setOutputValueClass(IntWritable.class);
//设置Map和Reduce类
conf.setMapperClass(Map.class);
conf.setCombinerClass(Combine.class);
conf.setReducerClass(Reduce.class);
//设置输如目录
FileInputFormat.setInputPaths(conf, newPath("wdb_in"));
//建立数据库连接
DBConfiguration.configureDB(conf, "com.mysql.jdbc.Driver",
"jdbc:mysql://192.168.1.24:3306/school", "root", "hadoop");
//写入"wordcount"表中的数据
String[] fields = { "word", "number"};
DBOutputFormat.setOutput(conf, "wordcount", fields);
JobClient.runJob(conf);
}
}