WordCount案例的java实现和scala实现
WordCount案例的Java实现
maven项目导入依赖
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-common</artifactId>
<version>2.7.2</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-client</artifactId>
<version>2.7.2</version>
</dependency>
Mapper类
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import java.io.IOException;
public class WCMapper extends Mapper<LongWritable,Text,Text,IntWritable> {
String name;
public void setup(Context context){
name = context.getConfiguration().get("name");
}
public void map(LongWritable key,Text value,Context context) throws IOException, InterruptedException {
String[] strs = value.toString().split("\t");
for (String str : strs) {
Text text = new Text();
text.set(name + "-" + str);
context.write(text,new IntWritable(1));
}
}
public void cleanup(Context context) throws IOException, InterruptedException {
super.cleanup(context);
}
}
Reducer类
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.output.MultipleOutputs;
import java.io.IOException;
public class WCReducer extends Reducer<Text,IntWritable,Text,IntWritable> {
MultipleOutputs mo;
public void setup(Context context) throws IOException, InterruptedException {
mo = new MultipleOutputs(context);
super.setup(context);
}
public void reduce(Text key,Iterable<IntWritable> value,Context context) throws IOException, InterruptedException {
int sum = 0;
for(IntWritable i : value){
sum += i.get();
}
mo.write(key,new IntWritable(sum),"CDS");
mo.write(key,new IntWritable(sum),"CES");
}
public void cleanup(Context context) throws IOException, InterruptedException {
mo.close();
super.cleanup(context);
}
}
Driver类
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
public class WCDriver {
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
conf.set("name","cxb");
Job job = Job.getInstance(conf);
job.setJarByClass(WCDriver.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(IntWritable.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
job.setMapperClass(WCMapper.class);
job.setReducerClass(WCReducer.class);
Path path = new Path("C:/Users/陈大帅/Desktop/a");
FileSystem fs = FileSystem.get(conf);
if(fs.exists(path)){
fs.delete(path,true);
}
FileInputFormat.setInputPaths(job,new Path("C:/Users/陈大帅/Desktop/a.txt"));
FileOutputFormat.setOutputPath(job,new Path("C:/Users/陈大帅/Desktop/a"));
boolean b = job.waitForCompletion(true);
System.out.println(b);
}
}
WordCount案例的Scala实现
maven项目导入依赖
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-common</artifactId>
<version>2.7.2</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-client</artifactId>
<version>2.7.2</version>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-core_2.11</artifactId>
<version>2.1.1</version>
</dependency>
Object
import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.fs.{FileSystem, Path}
import org.apache.spark.rdd.RDD
import org.apache.spark.{SparkConf, SparkContext}
object WCScala {
def main(args: Array[String]): Unit = {
val conf = new SparkConf()
.setMaster("local[*]")
.setAppName(this.getClass.getName)
val sc = new SparkContext(conf)
val source: RDD[String] = sc.textFile("C:/Users/陈大帅/Desktop/a.txt",1)
val words: RDD[String] = source.flatMap(_.split("\t",-1))
val wordAndOne: RDD[(String, Int)] = words.map((_,1)).cache()
val wordAndOneCount: RDD[(String, Int)] = wordAndOne.reduceByKey(_+_)
val path = new Path("C:/Users/陈大帅/Desktop/a")
val fs: FileSystem = FileSystem.get(new Configuration())
if (fs.exists(path)){
fs.delete(path, true)
}
wordAndOneCount.saveAsTextFile("C:/Users/陈大帅/Desktop/a")
sc.stop();
}
}