package cn.itning.job.wordcount
import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.fs.Path
import org.apache.hadoop.io.LongWritable
import org.apache.hadoop.io.Text
import org.apache.hadoop.mapreduce.Job
import org.apache.hadoop.mapreduce.lib.input.CombineFileInputFormat
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat
object WordCountRunner {
@JvmStatic
fun main(args: Array<String>) {
val configuration = Configuration()
val job = Job.getInstance(configuration)
//设置job中的资源所在的jar包
job.setJarByClass(WordCountRunner::class.java)
//job要使用哪个mapper类
job.mapperClass = WordCountMapper().javaClass
//job要使用哪个reducer类
job.reducerClass = WordCountReducer().javaClass
//指定本job所使用的combiner类定义
job.combinerClass = WordCountReducer().javaClass
//job的mapper类输出的kv数据类型
job.mapOutputKeyClass = Text().javaClass
job.mapOutputValueClass = LongWritable().javaClass
//job的reducer类输出的kv数据类型
job.outputKeyClass = Text().javaClass
job.outputValueClass = LongWritable().javaClass
//默认使用TextInputFormat
job.inputFormatClass=TextInputFormat::class.java
//合并文件分片CombineFileInputFormat
//指定要处理的原始数据所存放的路径
FileInputFormat.setInputPaths(job, "hdfs://192.168.84.132:9000/test")
//指定处理之后的结果输出到哪个路径
FileOutputFormat.setOutputPath(job, Path("hdfs://192.168.84.132:9000/test/output"))
//true if the job succeeded
val res = job.waitForCompletion(true)
}
}
package cn.itning.job.wordcount
import org.apache.hadoop.io.LongWritable
import org.apache.hadoop.io.Text
import org.apache.hadoop.mapreduce.Mapper
class WordCountMapper : Mapper<LongWritable, Text, Text, LongWritable>() {
override fun map(key: LongWritable?, value: Text?, context: Context?) {
val strs = value?.toString()?.split(" ")
for (str in strs!!) {
context?.write(Text(str), LongWritable(1))
}
}
}
package cn.itning.job.wordcount
import org.apache.hadoop.io.LongWritable
import org.apache.hadoop.io.Text
import org.apache.hadoop.mapreduce.Reducer
class WordCountReducer : Reducer<Text, LongWritable, Text, LongWritable>() {
override fun reduce(key: Text?, values: MutableIterable<LongWritable>?, context: Context?) {
var count: Long = 0
for (value in values!!) {
count += value.get()
}
context?.write(key, LongWritable(count))
}
}