scala+sparkAPI
package com. credi
import org. apache. spark. rdd. RDD
import org. apache. spark. { SparkConf, SparkContext}
object ScalaWordCountEasy {
def main ( args: Array[ String] ) : Unit = {
val conf = new SparkConf ( )
conf. setMaster ( "local" ) . setAppName ( "SWC" )
val sc = new SparkContext ( conf)
val lines = sc. textFile ( "./Words" ) . flatMap ( lines=> {
lines. split ( " " )
} ) . map ( word=> {
new Tuple2 ( word , 1 )
} ) . reduceByKey ( _+ _) . foreach ( println)
sc. stop ( )
}
}
java代码实现sparkAPI
package com. java;
import org. apache. spark. SparkConf;
import org. apache. spark. SparkContext;
import org. apache. spark. api. java. JavaPairRDD;
import org. apache. spark. api. java. JavaRDD;
import org. apache. spark. api. java. JavaSparkContext;
import org. apache. spark. api. java. function . * ;
import java. util. Arrays;
import java. util. List;
import org. codehaus. janino. Java;
import scala. Tuple2;
import scala. tools. cmd. gen. AnyVals;
public class sprkAPI {
public static void main ( String[ ] args) {
SparkConf sparkConf = new SparkConf ( ) ;
sparkConf. setMaster ( "local" ) . setAppName ( "wc" ) ;
JavaSparkContext javaSparkContext = new JavaSparkContext ( sparkConf) ;
JavaRDD< String> javaRDD = javaSparkContext. textFile ( "./wc.txt" ) ;
JavaRDD< String> words = javaRDD. flatMap ( new FlatMapFunction < String, String> ( ) {
@Override
public Iterable< String> call ( String s) throws Exception {
String[ ] split = s. split ( " " ) ;
List< String> strings = Arrays. asList ( split) ;
return strings;
}
} ) ;
PairFunction< String, String, Integer> f = new PairFunction < String, String, Integer> ( ) {
@Override
public Tuple2< String, Integer> call ( String s) throws Exception {
return new Tuple2 ( s, 1 ) ;
}
} ;
JavaPairRDD< String, Integer> wordMap = words. mapToPair ( f) ;
JavaPairRDD< String, Integer> rdd = wordMap. reduceByKey ( new Function2 < Integer, Integer, Integer> ( ) {
@Override
public Integer call ( Integer integer, Integer integer2) throws Exception {
return integer + integer2;
}
} ) ;
rdd. foreach ( new VoidFunction < Tuple2< String, Integer>> ( ) {
@Override
public void call ( Tuple2< String, Integer> stringIntegerTuple2) throws Exception {
System. out. println ( stringIntegerTuple2. _1+ " : " + stringIntegerTuple2. _2) ;
}
} ) ;
}
}