scala是构建在java之上的脚本化语言,而hadoop的底层也是java代码,所以用java写mapreduce是最合适的,而scala是怎样的呢?
因为scala是推荐不加类型声明的, 如val a=“hello”, 但是在hadoop的mapreduce接口中,都有泛型要求,这就像要一个speak English 的老外 ,叫他去学chiness一样,是不太友好的!
下面是具体探究过程:
首先,来看下scala如何与hdfs交互:
- 文件读,写,上传,下载
在idea中导入hadoop依赖
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-client</artifactId>
<version>2.7.3</version>
</dependency>
写一个object: 含有main方法的class伴生对象
import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.fs.{FileSystem, Path}
/**
* 连接hadoop集群,读取文件
*/
object ConHdfs {
// 配置,文件系统
val conf = new Configuration()
conf.set("fs.defaultFS", "hdfs://localhost:9000")
val fs = FileSystem.get(conf)
//读取文件
def readFile(): Unit = {
val in = fs.open(new Path("/people.txt"))
var buf = new Array[Byte](1024)
var len = in.read(buf)
while (len != -1) {
print(new String(buf, 0, len))
len = in.read(buf)
}
//关闭资源
in.close()
}
//写文件
def writeFile(): Unit = {
val out = fs.create(new Path("/a.txt"), true)
for (x <- 1 to 5) {
out.writeBytes("hello," + x)
}
out.close()
}
//上传文件//下载文件
def cpFile(): Unit = {
fs.copyFromLocalFile(new Path("/home/wang/a.txt"), new Path("/a2.txt"))
fs.copyToLocalFile(new Path("/a.txt"), new Path("/home/wang/aaa.txt"))
fs.close()
}
/**
* 启动程序
*/
def main(args: Array[String]): Unit = {
readFile()
writeFile()
cpFile()
}
}
然后,再看scala如何与mapreduce交互
- 准备三要素【mapper, reducer, job】,
他们要被hadoop框架调度就必须要实现map,reduce接口,实现job的调度规则,这样才能正确的被接入hadoop的mapreduce程序
import org.apache.hadoop.conf.{Configuration, Configured}
import org.apache.hadoop.fs.Path
import org.apache.hadoop.io.{IntWritable, LongWritable, Text}
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat
import org.apache.hadoop.mapreduce.{Job, Mapper, Reducer}
import org.apache.hadoop.util.Tool
/**
* mapper
*/
class Map1 extends Mapper[LongWritable, Text, Text, IntWritable] {
override def map(key: LongWritable,
value: Text,
context: Mapper[LongWritable, Text, Text, IntWritable]#Context): Unit = {
val words = value.toString
val arr = words.split(",")
//遍历所欲单词
for (x <- arr) {
context.write(new Text(x), new IntWritable(1))
}
}
}
/**
* reducer
*/
class Red1 extends Reducer[Text, IntWritable, Text, IntWritable] {
def reduce(key: Text,
values: Iterable[IntWritable],
context: Reducer[Text, IntWritable, Text, IntWritable]#Context): Unit = {
//统计个数
var cnt:Int=0
for (x :IntWritable<- values){
cnt +=x.get()
}
context.write(key,new IntWritable(cnt))
}
}
/**
* 启动类
*/
object MrTest extends Configured with Tool{
override def run(args: Array[String]): Int = {
val conf = new Configuration()
conf.set("fs.defaultFS","hdfs://localhost:9000")
val job = Job.getInstance(conf)
job.setJarByClass(this.getClass)
job.setJobName("wordcnt")
//map,reduce,combiner
job.setMapperClass(classOf[Map])
job.setCombinerClass(classOf[Red1])
job.setReducerClass(classOf[Red1])
job.setMapOutputKeyClass(classOf[Text])
job.setOutputValueClass(classOf[IntWritable])
//输入输出
FileInputFormat.addInputPath(job,new Path("/people.txt"))
FileOutputFormat.setOutputPath(job,new Path("/wc"))
//等待作业完成
val status:Boolean= job.waitForCompletion(true)
//返回值: int 【0完成, 1 失败】
if (status) 0 else 1
}
def main(args: Array[String]): Unit = {
run(Array())
}
}