SQL context available as sqlContext.
scala> var myVar : String = "Foo"
myVar: String = Foo
scala> val myVal : String = "Foo"
myVal: String = Foo
scala> var myVar : String = "Foo1"
myVar: String = Foo1
scala> myVal="aa"
<console>:27: error: reassignment to val
myVal="aa"
^
scala> myVal="aa";
<console>:27: error: reassignment to val
myVal="aa";
^
scala> myVal="Foo";
<console>:27: error: reassignment to val
myVal="Foo";
^
scala> myVar="jack"
myVar: String = jack
scala> var myVar = 10;
myVar: Int = 10
scala> val myVal = "Hello, Scala!";
myVal: String = Hello, Scala!
scala> val (myVar1: Int, myVar2: String) = Pair(40, "Foo")
myVar1: Int = 40
myVar2: String = Foo
scala> val jack: (myVar1: Int, myVar2: String) = Pair(40, "Foo")
<console>:1: error: ')' expected but ':' found.
val jack: (myVar1: Int, myVar2: String) = Pair(40, "Foo")
^
scala> val jack: (myVar1: Int, myVar2: String) = Pair(40, "Foo")
<console>:1: error: ')' expected but ':' found.
val jack: (myVar1: Int, myVar2: String) = Pair(40, "Foo")
^
scala> val (myVar1, myVar2) = Pair(40, "Foo")
myVar1: Int = 40
myVar2: String = Foo
scala> object Test{
| def main(args:Array[String]){
| var x = 10
| if(x Test
res0: Test.type = $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$Test$@63843e11
scala> Test
res1: Test.type = $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$Test$@63843e11
scala> Test.main("a")
<console>:28: error: type mismatch;
found : String("a")
required: Array[String]
Test.main("a")
^
scala> Test.main(List.("a"))
<console>:1: error: identifier expected but '(' found.
Test.main(List.("a"))
^
scala> val arr = sc.parallelize(Array(("A",1),("B",2),("C",3)));
arr: org.apache.spark.rdd.RDD[(String, Int)] = ParallelCollectionRDD[0] at parallelize at <console>:27
scala> arr.flatmap(x=>(x._1+x_2)).foreach(println);
<console>:30: error: value flatmap is not a member of org.apache.spark.rdd.RDD[(String, Int)]
arr.flatmap(x=>(x._1+x_2)).foreach(println);
^
scala> arr.map(x=>(x._1+x_2)).foreach(println);
<console>:30: error: not found: value x_2
arr.map(x=>(x._1+x_2)).foreach(println);
^
scala> arr.map(x=>(x._1+x._2)).foreach(println);
scala> val arr = sc.parallelize(Array(("A",1),("B",2),("C",3)));
arr: org.apache.spark.rdd.RDD[(String, Int)] = ParallelCollectionRDD[2] at parallelize at <console>:27
scala> arr.map(x=>(x._1+x._2)).foreach(println);
scala> List("a","b").foreach(println);
a
b
scala> val arr = sc.parallelize(Array(("A",1),("B",2),("C",3)));
arr: org.apache.spark.rdd.RDD[(String, Int)] = ParallelCollectionRDD[4] at parallelize at <console>:27
scala> println(arr);
ParallelCollectionRDD[4] at parallelize at <console>:27
scala> arr.foreach(println);
scala> val arr=sc.parallelize(Array(("A",1),("B",2),("C",3)))
arr: org.apache.spark.rdd.RDD[(String, Int)] = ParallelCollectionRDD[5] at parallelize at <console>:27
scala> arr.flatmap(x=>(x._1+x._2)).foreach(println)
<console>:30: error: value flatmap is not a member of org.apache.spark.rdd.RDD[(String, Int)]
arr.flatmap(x=>(x._1+x._2)).foreach(println)
^
scala> val arr=sc.parallelize(Array(("A",1),("B",2),("C",3)))
arr: org.apache.spark.rdd.RDD[(String, Int)] = ParallelCollectionRDD[6] at parallelize at <console>:27
scala> arr.map(x=>(x._1+x._2)).foreach(println)
scala> arr.flatMap(x=>(x._1+x._2)).foreach(println)
scala> arr.flatMap(x=>(x._1+x._2)).foreach(println)
scala> arr.flatMap(x=>(x._1+x._2)).foreach(println)
scala> arr.map(x=>(x._1+","+x._2)).foreach(println)
scala> arr.first();
res16: (String, Int) = (A,1)
scala> arr.count()
res17: Long = 3
scala> val rdd = sc.parallelize(1,10,2);
<console>:27: error: too many arguments for method parallelize: (seq: Seq[T], numSlices: Int)(implicit evidence$1: scala.reflect.ClassTag[T])org.apache.spark.rdd.RDD[T]
val rdd = sc.parallelize(1,10,2);
^
scala> val rdd = sc.parallelize(1 to 10,2);
rdd: org.apache.spark.rdd.RDD[Int] = ParallelCollectionRDD[12] at parallelize at <console>:27
scala> val reduceRDD = rdd.reduce(_ + _)
reduceRDD: Int = 55
scala> rdd.first()
res18: Int = 1
scala> rdd.count
res19: Long = 10
scala> val reduceRDD1 = rdd.reduce(_ - _)
reduceRDD1: Int = 15
scala> val countRDD = rdd.count()
countRDD: Long = 10
scala> val firstRDD = rdd.first()
firstRDD: Int = 1
scala> val takeRDD = rdd.take(5)
takeRDD: Array[Int] = Array(1, 2, 3, 4, 5)
scala> val topRDD = rdd.top(3)
topRDD: Array[Int] = Array(10, 9, 8)
scala> val takeOrderedRDD = rdd.takeOrdered(3)
takeOrderedRDD: Array[Int] = Array(1, 2, 3)
scala> println("func +: "+reduceRDD)
func +: 55
scala> println("func -: "+reduceRDD1)
func -: 15
scala> println("count: "+countRDD)
count: 10
scala> println("first: "+firstRDD)
first: 1
scala> println("take:")
take:
scala> takeRDD.foreach(x => print(x +" "))
1 2 3 4 5
scala> takeRDD.foreach(x => println(x +" "))
1
2
3
4
5
scala> arr.flatMap(x=>(x._1+x._2)).foreach(x=>println(x +""))
scala> val arr=sc.parallelize(Array(("A",1),("B",2),("C",3)))
arr: org.apache.spark.rdd.RDD[(String, Int)] = ParallelCollectionRDD[16] at parallelize at <console>:27
scala> arr.take(1)
res28: Array[(String, Int)] = Array((A,1))
scala> arr.foreach(x=>println(x._1 +","+x._2))
scala> arr.lookup("A");
res30: Seq[Int] = WrappedArray(1)
scala> arr.countByKey()
res31: scala.collection.Map[String,Long] = Map(B -> 1, A -> 1, C -> 1)
scala> arr.collectAsMap
res32: scala.collection.Map[String,Int] = Map(A -> 1, C -> 3, B -> 2)
scala> val arr = List(("A", 1), ("B", 2), ("A", 2), ("B", 3))
arr: List[(String, Int)] = List((A,1), (B,2), (A,2), (B,3))
scala> val rdd = sc.parallelize(arr,2)
rdd: org.apache.spark.rdd.RDD[(String, Int)] = ParallelCollectionRDD[21] at parallelize at <console>:29
scala> val countByKeyRDD = rdd.countByKey()
countByKeyRDD: scala.collection.Map[String,Long] = Map(B -> 2, A -> 2)
scala> val collectAsMapRDD = rdd.collectAsMap()
collectAsMapRDD: scala.collection.Map[String,Int] = Map(A -> 2, B -> 3)
scala> countByKeyRDD.foreach(print)
(B,2)(A,2)
scala> collectAsMapRDD.foreach(print)
(A,2)(B,3)
scala> val rdd = sc.parallelize(List(1,2,3,4),2)
rdd: org.apache.spark.rdd.RDD[Int] = ParallelCollectionRDD[24] at parallelize at <console>:27
scala> val aggregateRDD = rdd.aggregate(2)(_+_,_ * _)
aggregateRDD: Int = 90
scala> println(aggregateRDD)
90
scala> def setOp(a:String,b:String):String={
| println("seqOp:"+a+"\t"+b);
| math.min(a.length, b.length).toString();
| }
setOp: (a: String, b: String)String
scala> def combOp(a:String,b:String):String = {
| println("combOp:"+a+"\t"+b);
| a+b;
| }
combOp: (a: String, b: String)String
scala> val z = sc.parallelize(List("12","23","345","4567"),2);
z: org.apache.spark.rdd.RDD[String] = ParallelCollectionRDD[25] at parallelize at <console>:27
scala> z.aggregate("")(seqOp,combOp);
<console>:32: error: not found: value seqOp
z.aggregate("")(seqOp,combOp);
^
scala> def seqOp(a:String,b:String):String={
| println("seqOp:"+a+"\t"+b);
| math.min(a.length, b.length).toString();
| }
seqOp: (a: String, b: String)String
scala> z.aggregate("")(seqOp,combOp);
[Stage 30:> (0 + 2) / 2]combOp: 1
[Stage 30:=============================> (1 + 1) / 2]combOp:1 1
res37: String = 11
scala> z.aggregate("")(seqOp,combOp);
combOp: 1
combOp:1 1
res38: String = 11
scala> z.aggregate("")(seqOp,combOp);
combOp: 1
combOp:1 1
res39: String = 11
scala> def seqOp(a:String,b:String):String={
| println("seqOp jack:"+a+"\t"+b);
| math.min(a.length, b.length).toString();
| }
seqOp: (a: String, b: String)String
scala> def combOp(a:String,b:String):String = {
| println("combOp jack:"+a+"\t"+b);
| a+b;
| }
combOp: (a: String, b: String)String
scala> val z = sc.parallelize(List("12","23","345","4567"),2);
z: org.apache.spark.rdd.RDD[String] = ParallelCollectionRDD[26] at parallelize at <console>:27
scala> z.aggregate("")(seqOp,combOp);
[Stage 33:> (0 + 2) / 2]combOp jack: 1
[Stage 33:=============================> (1 + 1) / 2]combOp jack:1 1
res40: String = 11
scala> z.aggregate("")(seqOp,combOp);
combOp jack: 1
[Stage 34:=============================> (1 + 1) / 2]combOp jack:1 1
res41: String = 11
scala> z.aggregate("")(seqOp,combOp);
[Stage 35:> (0 + 0) / 2]combOp jack: 1
combOp jack:1 1
res42: String = 11
scala> z.first()
res43: String = 12
scala> z.top(4)
res44: Array[String] = Array(4567, 345, 23, 12)
scala> z.count
res45: Long = 4
scala> val z = List("12","23","345","4567");
z: List[String] = List(12, 23, 345, 4567)
scala> z.aggregate("")(seqOp,combOp);
seqOp jack: 12
seqOp jack:0 23
seqOp jack:1 345
seqOp jack:1 4567
res46: String = 1
scala> def seqOp(a:String,b:String):String={
| println("seqOp jack:"+a+"\t"+b);
| println("return:"+math.min(a.length, b.length).toString());
| return math.min(a.length, b.length).toString();
| }
seqOp: (a: String, b: String)String
scala> def combOp(a:String,b:String):String = {
| println("combOp jack:"+a+"\t"+b);
| a+b;
| }
combOp: (a: String, b: String)String
scala> val z = List("12","23","345","4567");
z: List[String] = List(12, 23, 345, 4567)
scala> z.aggregate("")(seqOp,combOp);
seqOp jack: 12
return:0
seqOp jack:0 23
return:1
seqOp jack:1 345
return:1
seqOp jack:1 4567
return:1
res47: String = 1
scala> countByKeyRDD.foreach(print)
(B,2)(A,2)
scala> collectAsMapRDD.foreach(print)
(A,2)(B,3)
scala> collectAsMapRDD.saveAsTextFile("hdfs://192.168.1.56/tmp/jackteset")
<console>:34: error: value saveAsTextFile is not a member of scala.collection.Map[String,Int]
collectAsMapRDD.saveAsTextFile("hdfs://192.168.1.56/tmp/jackteset")
^
scala> var rdd1 = sc.makeRDD(1 to 10,2)
rdd1: org.apache.spark.rdd.RDD[Int] = ParallelCollectionRDD[28] at makeRDD at <console>:27
scala> rdd1.saveAsTextFile("hdfs://192.168.1.56/tmp/jackteset")
scala> rdd1.saveAsTextFile("hdfs://192.168.1.56/tmp/jackteset1",classOf[org.apache.hadoop.io.compress.SnappyCodec])</console></console></console></console></console></console></console></console></console></console></console></console></console></console></console></console></console></console></console></console></console></console></console></console></console></console>
scala> var myVar : String = "Foo"
myVar: String = Foo
scala> val myVal : String = "Foo"
myVal: String = Foo
scala> var myVar : String = "Foo1"
myVar: String = Foo1
scala> myVal="aa"
<console>:27: error: reassignment to val
myVal="aa"
^
scala> myVal="aa";
<console>:27: error: reassignment to val
myVal="aa";
^
scala> myVal="Foo";
<console>:27: error: reassignment to val
myVal="Foo";
^
scala> myVar="jack"
myVar: String = jack
scala> var myVar = 10;
myVar: Int = 10
scala> val myVal = "Hello, Scala!";
myVal: String = Hello, Scala!
scala> val (myVar1: Int, myVar2: String) = Pair(40, "Foo")
myVar1: Int = 40
myVar2: String = Foo
scala> val jack: (myVar1: Int, myVar2: String) = Pair(40, "Foo")
<console>:1: error: ')' expected but ':' found.
val jack: (myVar1: Int, myVar2: String) = Pair(40, "Foo")
^
scala> val jack: (myVar1: Int, myVar2: String) = Pair(40, "Foo")
<console>:1: error: ')' expected but ':' found.
val jack: (myVar1: Int, myVar2: String) = Pair(40, "Foo")
^
scala> val (myVar1, myVar2) = Pair(40, "Foo")
myVar1: Int = 40
myVar2: String = Foo
scala> object Test{
| def main(args:Array[String]){
| var x = 10
| if(x Test
res0: Test.type = $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$Test$@63843e11
scala> Test
res1: Test.type = $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$Test$@63843e11
scala> Test.main("a")
<console>:28: error: type mismatch;
found : String("a")
required: Array[String]
Test.main("a")
^
scala> Test.main(List.("a"))
<console>:1: error: identifier expected but '(' found.
Test.main(List.("a"))
^
scala> val arr = sc.parallelize(Array(("A",1),("B",2),("C",3)));
arr: org.apache.spark.rdd.RDD[(String, Int)] = ParallelCollectionRDD[0] at parallelize at <console>:27
scala> arr.flatmap(x=>(x._1+x_2)).foreach(println);
<console>:30: error: value flatmap is not a member of org.apache.spark.rdd.RDD[(String, Int)]
arr.flatmap(x=>(x._1+x_2)).foreach(println);
^
scala> arr.map(x=>(x._1+x_2)).foreach(println);
<console>:30: error: not found: value x_2
arr.map(x=>(x._1+x_2)).foreach(println);
^
scala> arr.map(x=>(x._1+x._2)).foreach(println);
scala> val arr = sc.parallelize(Array(("A",1),("B",2),("C",3)));
arr: org.apache.spark.rdd.RDD[(String, Int)] = ParallelCollectionRDD[2] at parallelize at <console>:27
scala> arr.map(x=>(x._1+x._2)).foreach(println);
scala> List("a","b").foreach(println);
a
b
scala> val arr = sc.parallelize(Array(("A",1),("B",2),("C",3)));
arr: org.apache.spark.rdd.RDD[(String, Int)] = ParallelCollectionRDD[4] at parallelize at <console>:27
scala> println(arr);
ParallelCollectionRDD[4] at parallelize at <console>:27
scala> arr.foreach(println);
scala> val arr=sc.parallelize(Array(("A",1),("B",2),("C",3)))
arr: org.apache.spark.rdd.RDD[(String, Int)] = ParallelCollectionRDD[5] at parallelize at <console>:27
scala> arr.flatmap(x=>(x._1+x._2)).foreach(println)
<console>:30: error: value flatmap is not a member of org.apache.spark.rdd.RDD[(String, Int)]
arr.flatmap(x=>(x._1+x._2)).foreach(println)
^
scala> val arr=sc.parallelize(Array(("A",1),("B",2),("C",3)))
arr: org.apache.spark.rdd.RDD[(String, Int)] = ParallelCollectionRDD[6] at parallelize at <console>:27
scala> arr.map(x=>(x._1+x._2)).foreach(println)
scala> arr.flatMap(x=>(x._1+x._2)).foreach(println)
scala> arr.flatMap(x=>(x._1+x._2)).foreach(println)
scala> arr.flatMap(x=>(x._1+x._2)).foreach(println)
scala> arr.map(x=>(x._1+","+x._2)).foreach(println)
scala> arr.first();
res16: (String, Int) = (A,1)
scala> arr.count()
res17: Long = 3
scala> val rdd = sc.parallelize(1,10,2);
<console>:27: error: too many arguments for method parallelize: (seq: Seq[T], numSlices: Int)(implicit evidence$1: scala.reflect.ClassTag[T])org.apache.spark.rdd.RDD[T]
val rdd = sc.parallelize(1,10,2);
^
scala> val rdd = sc.parallelize(1 to 10,2);
rdd: org.apache.spark.rdd.RDD[Int] = ParallelCollectionRDD[12] at parallelize at <console>:27
scala> val reduceRDD = rdd.reduce(_ + _)
reduceRDD: Int = 55
scala> rdd.first()
res18: Int = 1
scala> rdd.count
res19: Long = 10
scala> val reduceRDD1 = rdd.reduce(_ - _)
reduceRDD1: Int = 15
scala> val countRDD = rdd.count()
countRDD: Long = 10
scala> val firstRDD = rdd.first()
firstRDD: Int = 1
scala> val takeRDD = rdd.take(5)
takeRDD: Array[Int] = Array(1, 2, 3, 4, 5)
scala> val topRDD = rdd.top(3)
topRDD: Array[Int] = Array(10, 9, 8)
scala> val takeOrderedRDD = rdd.takeOrdered(3)
takeOrderedRDD: Array[Int] = Array(1, 2, 3)
scala> println("func +: "+reduceRDD)
func +: 55
scala> println("func -: "+reduceRDD1)
func -: 15
scala> println("count: "+countRDD)
count: 10
scala> println("first: "+firstRDD)
first: 1
scala> println("take:")
take:
scala> takeRDD.foreach(x => print(x +" "))
1 2 3 4 5
scala> takeRDD.foreach(x => println(x +" "))
1
2
3
4
5
scala> arr.flatMap(x=>(x._1+x._2)).foreach(x=>println(x +""))
scala> val arr=sc.parallelize(Array(("A",1),("B",2),("C",3)))
arr: org.apache.spark.rdd.RDD[(String, Int)] = ParallelCollectionRDD[16] at parallelize at <console>:27
scala> arr.take(1)
res28: Array[(String, Int)] = Array((A,1))
scala> arr.foreach(x=>println(x._1 +","+x._2))
scala> arr.lookup("A");
res30: Seq[Int] = WrappedArray(1)
scala> arr.countByKey()
res31: scala.collection.Map[String,Long] = Map(B -> 1, A -> 1, C -> 1)
scala> arr.collectAsMap
res32: scala.collection.Map[String,Int] = Map(A -> 1, C -> 3, B -> 2)
scala> val arr = List(("A", 1), ("B", 2), ("A", 2), ("B", 3))
arr: List[(String, Int)] = List((A,1), (B,2), (A,2), (B,3))
scala> val rdd = sc.parallelize(arr,2)
rdd: org.apache.spark.rdd.RDD[(String, Int)] = ParallelCollectionRDD[21] at parallelize at <console>:29
scala> val countByKeyRDD = rdd.countByKey()
countByKeyRDD: scala.collection.Map[String,Long] = Map(B -> 2, A -> 2)
scala> val collectAsMapRDD = rdd.collectAsMap()
collectAsMapRDD: scala.collection.Map[String,Int] = Map(A -> 2, B -> 3)
scala> countByKeyRDD.foreach(print)
(B,2)(A,2)
scala> collectAsMapRDD.foreach(print)
(A,2)(B,3)
scala> val rdd = sc.parallelize(List(1,2,3,4),2)
rdd: org.apache.spark.rdd.RDD[Int] = ParallelCollectionRDD[24] at parallelize at <console>:27
scala> val aggregateRDD = rdd.aggregate(2)(_+_,_ * _)
aggregateRDD: Int = 90
scala> println(aggregateRDD)
90
scala> def setOp(a:String,b:String):String={
| println("seqOp:"+a+"\t"+b);
| math.min(a.length, b.length).toString();
| }
setOp: (a: String, b: String)String
scala> def combOp(a:String,b:String):String = {
| println("combOp:"+a+"\t"+b);
| a+b;
| }
combOp: (a: String, b: String)String
scala> val z = sc.parallelize(List("12","23","345","4567"),2);
z: org.apache.spark.rdd.RDD[String] = ParallelCollectionRDD[25] at parallelize at <console>:27
scala> z.aggregate("")(seqOp,combOp);
<console>:32: error: not found: value seqOp
z.aggregate("")(seqOp,combOp);
^
scala> def seqOp(a:String,b:String):String={
| println("seqOp:"+a+"\t"+b);
| math.min(a.length, b.length).toString();
| }
seqOp: (a: String, b: String)String
scala> z.aggregate("")(seqOp,combOp);
[Stage 30:> (0 + 2) / 2]combOp: 1
[Stage 30:=============================> (1 + 1) / 2]combOp:1 1
res37: String = 11
scala> z.aggregate("")(seqOp,combOp);
combOp: 1
combOp:1 1
res38: String = 11
scala> z.aggregate("")(seqOp,combOp);
combOp: 1
combOp:1 1
res39: String = 11
scala> def seqOp(a:String,b:String):String={
| println("seqOp jack:"+a+"\t"+b);
| math.min(a.length, b.length).toString();
| }
seqOp: (a: String, b: String)String
scala> def combOp(a:String,b:String):String = {
| println("combOp jack:"+a+"\t"+b);
| a+b;
| }
combOp: (a: String, b: String)String
scala> val z = sc.parallelize(List("12","23","345","4567"),2);
z: org.apache.spark.rdd.RDD[String] = ParallelCollectionRDD[26] at parallelize at <console>:27
scala> z.aggregate("")(seqOp,combOp);
[Stage 33:> (0 + 2) / 2]combOp jack: 1
[Stage 33:=============================> (1 + 1) / 2]combOp jack:1 1
res40: String = 11
scala> z.aggregate("")(seqOp,combOp);
combOp jack: 1
[Stage 34:=============================> (1 + 1) / 2]combOp jack:1 1
res41: String = 11
scala> z.aggregate("")(seqOp,combOp);
[Stage 35:> (0 + 0) / 2]combOp jack: 1
combOp jack:1 1
res42: String = 11
scala> z.first()
res43: String = 12
scala> z.top(4)
res44: Array[String] = Array(4567, 345, 23, 12)
scala> z.count
res45: Long = 4
scala> val z = List("12","23","345","4567");
z: List[String] = List(12, 23, 345, 4567)
scala> z.aggregate("")(seqOp,combOp);
seqOp jack: 12
seqOp jack:0 23
seqOp jack:1 345
seqOp jack:1 4567
res46: String = 1
scala> def seqOp(a:String,b:String):String={
| println("seqOp jack:"+a+"\t"+b);
| println("return:"+math.min(a.length, b.length).toString());
| return math.min(a.length, b.length).toString();
| }
seqOp: (a: String, b: String)String
scala> def combOp(a:String,b:String):String = {
| println("combOp jack:"+a+"\t"+b);
| a+b;
| }
combOp: (a: String, b: String)String
scala> val z = List("12","23","345","4567");
z: List[String] = List(12, 23, 345, 4567)
scala> z.aggregate("")(seqOp,combOp);
seqOp jack: 12
return:0
seqOp jack:0 23
return:1
seqOp jack:1 345
return:1
seqOp jack:1 4567
return:1
res47: String = 1
scala> countByKeyRDD.foreach(print)
(B,2)(A,2)
scala> collectAsMapRDD.foreach(print)
(A,2)(B,3)
scala> collectAsMapRDD.saveAsTextFile("hdfs://192.168.1.56/tmp/jackteset")
<console>:34: error: value saveAsTextFile is not a member of scala.collection.Map[String,Int]
collectAsMapRDD.saveAsTextFile("hdfs://192.168.1.56/tmp/jackteset")
^
scala> var rdd1 = sc.makeRDD(1 to 10,2)
rdd1: org.apache.spark.rdd.RDD[Int] = ParallelCollectionRDD[28] at makeRDD at <console>:27
scala> rdd1.saveAsTextFile("hdfs://192.168.1.56/tmp/jackteset")
scala> rdd1.saveAsTextFile("hdfs://192.168.1.56/tmp/jackteset1",classOf[org.apache.hadoop.io.compress.SnappyCodec])</console></console></console></console></console></console></console></console></console></console></console></console></console></console></console></console></console></console></console></console></console></console></console></console></console></console>