用java编写spark程序,简单示例及运行



最近因为工作需要,研究了下spark,因为scala还不熟,所以先学习了java的spark程序写法,下面是我的简单测试程序的代码,大部分函数的用法已在注释里面注明。


我的环境:hadoop 2.2.0

                   spark-0.9.0

                   scala-2.10.3

                   jdk1.7



  1. import org.apache.spark.api.java.JavaPairRDD;  
  2. import org.apache.spark.api.java.JavaRDD;  
  3. import org.apache.spark.api.java.JavaSparkContext;  
  4. import org.apache.spark.api.java.function.FlatMapFunction;  
  5. import org.apache.spark.api.java.function.Function;  
  6. import org.apache.spark.api.java.function.Function2;  
  7. import org.apache.spark.api.java.function.PairFunction;  
  8. import scala.Tuple2;  
  9.   
  10.   
  11. import java.util.Arrays;  
  12. import java.util.List;  
  13. import java.util.regex.Pattern;  
  14.   
  15.   
  16. public final class mysparktest {  
  17.   
  18.   
  19.     public static void main(String[] args) throws Exception {  
  20.   
  21.   
  22.        //context ,用于读文件 ,类似于scala的sc  
  23.        //格式为:  
  24.        // JavaSparkContext(master: String, appName: String, sparkHome: String, jars: Array[String], environment: Map[String, String])  
  25.         JavaSparkContext ctx = new JavaSparkContext("yarn-standalone""JavaWordCount",  
  26.                 System.getenv("SPARK_HOME"), JavaSparkContext.jarOfClass(mysparktest.class));  
  27.   
  28.   
  29.         //也可以使用ctx获取环境变量,例如下面的语句  
  30.         System.out.println("spark home:"+ctx.getSparkHome());  
  31.   
  32.   
  33.   
  34.   
  35.   
  36.   
  37.          //一次一行,String类型    ,还有hadoopfile,sequenceFile什么的  ,可以直接用sc.textFile("path")  
  38.         JavaRDD<String> lines = ctx.textFile(args[1], 1);  //java.lang.String path, int minSplits  
  39.         lines.cache();   //cache,暂时放在缓存中,一般用于哪些可能需要多次使用的RDD,据说这样会减少运行时间  
  40.   
  41.   
  42.         //collect方法,用于将RDD类型转化为java基本类型,如下  
  43.         List<String> line = lines.collect();  
  44.         for(String val:line)  
  45.                 System.out.println(val);  
  46.   
  47.   
  48.        //下面这些也是RDD的常用函数  
  49.        // lines.collect();  List<String>  
  50.        // lines.union();     javaRDD<String>  
  51.        // lines.top(1);     List<String>  
  52.        // lines.count();      long  
  53.        // lines.countByValue();  
  54.   
  55.   
  56.         /** 
  57.          *   filter test 
  58.          *   定义一个返回bool类型的函数,spark运行filter的时候会过滤掉那些返回只为false的数据 
  59.          *   String s,中的变量s可以认为就是变量lines(lines可以理解为一系列的String类型数据)的每一条数据 
  60.          */  
  61.         JavaRDD<String> contaninsE = lines.filter(new Function<String, Boolean>() {  
  62.             @Override  
  63.             public Boolean call(String s) throws Exception {  
  64.   
  65.   
  66.                return (s.contains("they"));  
  67.             }  
  68.         });  
  69.         System.out.println("--------------next filter's  result------------------");  
  70.         line = contaninsE.collect();  
  71.         for(String val:line)  
  72.             System.out.println(val);  
  73.   
  74.   
  75.         /** 
  76.          * sample test 
  77.          * sample函数使用很简单,用于对数据进行抽样 
  78.          * 参数为:withReplacement: Boolean, fraction: Double, seed: Int 
  79.          * 
  80.          */  
  81.   
  82.   
  83.         JavaRDD<String> sampletest = lines.sample(false,0.1,5);  
  84.         System.out.println("-------------next sample-------------------");  
  85.         line = sampletest.collect();  
  86.         for(String val:line)  
  87.             System.out.println(val);  
  88.   
  89.   
  90.   
  91.   
  92.         /** 
  93.          * 
  94.          * new FlatMapFunction<String, String>两个string分别代表输入和输出类型 
  95.          * Override的call方法需要自己实现一个转换的方法,并返回一个Iterable的结构 
  96.          * 
  97.          * flatmap属于一类非常常用的spark函数,简单的说作用就是将一条rdd数据使用你定义的函数给分解成多条rdd数据 
  98.          * 例如,当前状态下,lines这个rdd类型的变量中,每一条数据都是一行String,我们现在想把他拆分成1个个的词的话, 
  99.          * 可以这样写 : 
  100.          */  
  101.   
  102.   
  103.         JavaRDD<String> words = lines.flatMap(new FlatMapFunction<String, String>() {  
  104.             @Override  
  105.             public Iterable<String> call(String s) {  
  106.                  String[] words=s.split(" ");  
  107.                   return Arrays.asList(words);  
  108.             }  
  109.         });  
  110.   
  111.   
  112.   
  113.   
  114.         /** 
  115.          * map 键值对 ,类似于MR的map方法 
  116.          * pairFunction<T,K,V>: T:输入类型;K,V:输出键值对 
  117.          * 需要重写call方法实现转换 
  118.          */  
  119.         JavaPairRDD<String, Integer> ones = words.map(new PairFunction<String, String, Integer>() {  
  120.             @Override  
  121.             public Tuple2<String, Integer> call(String s) {  
  122.                 return new Tuple2<String, Integer>(s, 1);  
  123.             }  
  124.         });  
  125.   
  126.   
  127.   
  128.   
  129.   
  130.   
  131.   
  132.   
  133.   
  134.   
  135.   
  136.   
  137.         //A two-argument function that takes arguments  
  138.         // of type T1 and T2 and returns an R.  
  139.         /** 
  140.          *  reduceByKey方法,类似于MR的reduce 
  141.          *  要求被操作的数据(即下面实例中的ones)是KV键值对形式,该方法会按照key相同的进行聚合,在两两运算 
  142.          */  
  143.         JavaPairRDD<String, Integer> counts = ones.reduceByKey(new Function2<Integer, Integer, Integer>() {  
  144.             @Override  
  145.             public Integer call(Integer i1, Integer i2) {  //reduce阶段,key相同的value怎么处理的问题  
  146.                 return i1 + i2;  
  147.             }  
  148.         });  
  149.   
  150.   
  151.         //备注:spark也有reduce方法,输入数据是RDD类型就可以,不需要键值对,  
  152.         // reduce方法会对输入进来的所有数据进行两两运算  
  153.   
  154.   
  155.   
  156.   
  157.   
  158.   
  159.         /** 
  160.          * sort,顾名思义,排序 
  161.          */  
  162.         JavaPairRDD<String,Integer> sort = counts.sortByKey();  
  163.         System.out.println("----------next sort----------------------");  
  164.   
  165.   
  166.   
  167.   
  168.         /** 
  169.          * collect方法其实之前已经出现了多次,该方法用于将spark的RDD类型转化为我们熟知的java常见类型 
  170.          */  
  171.         List<Tuple2<String, Integer>> output = sort.collect();  
  172.         for (Tuple2<?,?> tuple : output) {  
  173.             System.out.println(tuple._1 + ": " + tuple._2());  
  174.         }  
  175.   
  176.   
  177.   
  178.   
  179.         /** 
  180.          * 保存函数,数据输出,spark为结果输出提供了很多接口 
  181.          */  
  182.         sort.saveAsTextFile("/tmp/spark-tmp/test");  
  183.   
  184.   
  185.   
  186.   
  187.   
  188.   
  189.        // sort.saveAsNewAPIHadoopFile();  
  190.       //  sort.saveAsHadoopFile();  
  191.         System.exit(0);  
  192.     }  
  193. }  
import org.apache.spark.api.java.JavaPairRDD;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.function.FlatMapFunction;
import org.apache.spark.api.java.function.Function;
import org.apache.spark.api.java.function.Function2;
import org.apache.spark.api.java.function.PairFunction;
import scala.Tuple2;


import java.util.Arrays;
import java.util.List;
import java.util.regex.Pattern;


public final class mysparktest {


    public static void main(String[] args) throws Exception {


       //context ,用于读文件 ,类似于scala的sc
       //格式为:
       // JavaSparkContext(master: String, appName: String, sparkHome: String, jars: Array[String], environment: Map[String, String])
        JavaSparkContext ctx = new JavaSparkContext("yarn-standalone", "JavaWordCount",
                System.getenv("SPARK_HOME"), JavaSparkContext.jarOfClass(mysparktest.class));


        //也可以使用ctx获取环境变量,例如下面的语句
        System.out.println("spark home:"+ctx.getSparkHome());






         //一次一行,String类型    ,还有hadoopfile,sequenceFile什么的  ,可以直接用sc.textFile("path")
        JavaRDD<String> lines = ctx.textFile(args[1], 1);  //java.lang.String path, int minSplits
        lines.cache();   //cache,暂时放在缓存中,一般用于哪些可能需要多次使用的RDD,据说这样会减少运行时间


        //collect方法,用于将RDD类型转化为java基本类型,如下
        List<String> line = lines.collect();
        for(String val:line)
                System.out.println(val);


       //下面这些也是RDD的常用函数
       // lines.collect();  List<String>
       // lines.union();     javaRDD<String>
       // lines.top(1);     List<String>
       // lines.count();      long
       // lines.countByValue();


        /**
         *   filter test
         *   定义一个返回bool类型的函数,spark运行filter的时候会过滤掉那些返回只为false的数据
         *   String s,中的变量s可以认为就是变量lines(lines可以理解为一系列的String类型数据)的每一条数据
         */
        JavaRDD<String> contaninsE = lines.filter(new Function<String, Boolean>() {
            @Override
            public Boolean call(String s) throws Exception {


               return (s.contains("they"));
            }
        });
        System.out.println("--------------next filter's  result------------------");
        line = contaninsE.collect();
        for(String val:line)
            System.out.println(val);


        /**
         * sample test
         * sample函数使用很简单,用于对数据进行抽样
         * 参数为:withReplacement: Boolean, fraction: Double, seed: Int
         *
         */


        JavaRDD<String> sampletest = lines.sample(false,0.1,5);
        System.out.println("-------------next sample-------------------");
        line = sampletest.collect();
        for(String val:line)
            System.out.println(val);




        /**
         *
         * new FlatMapFunction<String, String>两个string分别代表输入和输出类型
         * Override的call方法需要自己实现一个转换的方法,并返回一个Iterable的结构
         *
         * flatmap属于一类非常常用的spark函数,简单的说作用就是将一条rdd数据使用你定义的函数给分解成多条rdd数据
         * 例如,当前状态下,lines这个rdd类型的变量中,每一条数据都是一行String,我们现在想把他拆分成1个个的词的话,
         * 可以这样写 :
         */


        JavaRDD<String> words = lines.flatMap(new FlatMapFunction<String, String>() {
            @Override
            public Iterable<String> call(String s) {
                 String[] words=s.split(" ");
                  return Arrays.asList(words);
            }
        });




        /**
         * map 键值对 ,类似于MR的map方法
         * pairFunction<T,K,V>: T:输入类型;K,V:输出键值对
         * 需要重写call方法实现转换
         */
        JavaPairRDD<String, Integer> ones = words.map(new PairFunction<String, String, Integer>() {
            @Override
            public Tuple2<String, Integer> call(String s) {
                return new Tuple2<String, Integer>(s, 1);
            }
        });












        //A two-argument function that takes arguments
        // of type T1 and T2 and returns an R.
        /**
         *  reduceByKey方法,类似于MR的reduce
         *  要求被操作的数据(即下面实例中的ones)是KV键值对形式,该方法会按照key相同的进行聚合,在两两运算
         */
        JavaPairRDD<String, Integer> counts = ones.reduceByKey(new Function2<Integer, Integer, Integer>() {
            @Override
            public Integer call(Integer i1, Integer i2) {  //reduce阶段,key相同的value怎么处理的问题
                return i1 + i2;
            }
        });


        //备注:spark也有reduce方法,输入数据是RDD类型就可以,不需要键值对,
        // reduce方法会对输入进来的所有数据进行两两运算






        /**
         * sort,顾名思义,排序
         */
        JavaPairRDD<String,Integer> sort = counts.sortByKey();
        System.out.println("----------next sort----------------------");




        /**
         * collect方法其实之前已经出现了多次,该方法用于将spark的RDD类型转化为我们熟知的java常见类型
         */
        List<Tuple2<String, Integer>> output = sort.collect();
        for (Tuple2<?,?> tuple : output) {
            System.out.println(tuple._1 + ": " + tuple._2());
        }




        /**
         * 保存函数,数据输出,spark为结果输出提供了很多接口
         */
        sort.saveAsTextFile("/tmp/spark-tmp/test");






       // sort.saveAsNewAPIHadoopFile();
      //  sort.saveAsHadoopFile();
        System.exit(0);
    }
}


代码编写完成之后,打包上传到Linux上,编写spark程序的执行脚本:

  1. #! /bin/bash  
  2. export YARN_CONF_DIR=/usr/lib/cloud/hadoop/hadoop-2.2.0/etc/hadoop  
  3. export SPARK_JAR=/usr/lib/cloud/spark/spark-0.9.0-incubating-bin-hadoop2/assembly/target/scala-2.10/spark-assembly_2.10-0.9.0-incubating-hadoop2.2.0.jar  
  4.   
  5. /usr/lib/cloud/spark/spark-0.9.0-incubating-bin-hadoop2/bin/spark-class org.apache.spark.deploy.yarn.Client \  
  6. --jar mysparktest.jar \  
  7. --class mysparktest.jar \  
  8. --args yarn-standalone \  
  9. --args /user/zhangdeyang/testspark \  
  10. --num-workers 3 \  
  11. --master-memory 485m \  
  12. --worker-memory 485m \  
  13. --worker-cores 2  
#! /bin/bash
export YARN_CONF_DIR=/usr/lib/cloud/hadoop/hadoop-2.2.0/etc/hadoop
export SPARK_JAR=/usr/lib/cloud/spark/spark-0.9.0-incubating-bin-hadoop2/assembly/target/scala-2.10/spark-assembly_2.10-0.9.0-incubating-hadoop2.2.0.jar

/usr/lib/cloud/spark/spark-0.9.0-incubating-bin-hadoop2/bin/spark-class org.apache.spark.deploy.yarn.Client \
--jar mysparktest.jar \
--class mysparktest.jar \
--args yarn-standalone \
--args /user/zhangdeyang/testspark \
--num-workers 3 \
--master-memory 485m \
--worker-memory 485m \
--worker-cores 2


其中输入数据保存在 

/user/zhangdeyang/testspark中,测试数据如下:                                   

  1.     Look! at the window there leans an old maid. She plucks the  
  2.   
  3. withered leaf from the balsam, and looks at the grass-covered rampart,  
  4.   
  5. on which many children are playing. What is the old maid thinking  
  6.   
  7. of? A whole life drama is unfolding itself before her inward gaze.  
  8.   
  9.     "The poor little children, how happy they are- how merrily they  
  10.   
  11. play and romp together! What red cheeks and what angels' eyes! but  
  12.   
  13. they have no shoes nor stockings. They dance on the green rampart,  
  14.   
  15. just on the place where, according to the old story, the ground always  
  16.   
  17. sank in, and where a sportive, frolicsome child had been lured by  
  18.   
  19. means of flowers, toys and sweetmeats into an open grave ready dug for  
  20.   
  21. it, and which was afterwards closed over the child; and from that  
  22.   
  23. moment, the old story says, the ground gave way no longer, the mound  
  24.   
  25. remained firm and fast, and was quickly covered with the green turf.  
  26.   
  27. The little people who now play on that spot know nothing of the old  
  28.   
  29. tale, else would they fancy they heard a child crying deep below the  
  30.   
  31. earth, and the dewdrops on each blade of grass would be to them  
  32.   
  33. tears of woe. Nor do they know anything of the Danish King who here,  
  34.   
  35. in the face of the coming foe, took an oath before all his trembling  
  36.   
  37. courtiers that he would hold out with the citizens of his capital, and  
  38.   
  39. die here in his nest; they know nothing of the men who have fought  
  40.   
  41. here, or of the women who from here have drenched with boiling water  
  42.   
  43. the enemy, clad in white, and 'biding in the snow to surprise the  
  44.   
  45. city.  
  46.   
  47. .  
    Look! at the window there leans an old maid. She plucks the

withered leaf from the balsam, and looks at the grass-covered rampart,

on which many children are playing. What is the old maid thinking

of? A whole life drama is unfolding itself before her inward gaze.

    "The poor little children, how happy they are- how merrily they

play and romp together! What red cheeks and what angels' eyes! but

they have no shoes nor stockings. They dance on the green rampart,

just on the place where, according to the old story, the ground always

sank in, and where a sportive, frolicsome child had been lured by

means of flowers, toys and sweetmeats into an open grave ready dug for

it, and which was afterwards closed over the child; and from that

moment, the old story says, the ground gave way no longer, the mound

remained firm and fast, and was quickly covered with the green turf.

The little people who now play on that spot know nothing of the old

tale, else would they fancy they heard a child crying deep below the

earth, and the dewdrops on each blade of grass would be to them

tears of woe. Nor do they know anything of the Danish King who here,

in the face of the coming foe, took an oath before all his trembling

courtiers that he would hold out with the citizens of his capital, and

die here in his nest; they know nothing of the men who have fought

here, or of the women who from here have drenched with boiling water

the enemy, clad in white, and 'biding in the snow to surprise the

city.

.


运行我们编写的运行脚本,可得结果如下:


  1. spark home:Optional.of(/usr/lib/cloud/spark/spark-0.9.0-incubating-bin-hadoop2)  
  2.  Look! at the window there leans an old maid. She plucks the  
  3.   
  4. withered leaf from the balsam, and looks at the grass-covered rampart,  
  5.   
  6. on which many children are playing. What is the old maid thinking  
  7.   
  8. of? A whole life drama is unfolding itself before her inward gaze.  
  9.   
  10.     "The poor little children, how happy they are- how merrily they  
  11.   
  12. play and romp together! What red cheeks and what angels' eyes! but  
  13.   
  14. they have no shoes nor stockings. They dance on the green rampart,  
  15.   
  16. just on the place where, according to the old story, the ground always  
  17.   
  18. sank in, and where a sportive, frolicsome child had been lured by  
  19.   
  20. means of flowers, toys and sweetmeats into an open grave ready dug for  
  21.   
  22. it, and which was afterwards closed over the child; and from that  
  23.   
  24. moment, the old story says, the ground gave way no longer, the mound  
  25.   
  26. remained firm and fast, and was quickly covered with the green turf.  
  27.   
  28. The little people who now play on that spot know nothing of the old  
  29.   
  30. tale, else would they fancy they heard a child crying deep below the  
  31.   
  32. earth, and the dewdrops on each blade of grass would be to them  
  33.   
  34. tears of woe. Nor do they know anything of the Danish King who here,  
  35.   
  36. in the face of the coming foe, took an oath before all his trembling  
  37.   
  38. courtiers that he would hold out with the citizens of his capital, and  
  39.   
  40. die here in his nest; they know nothing of the men who have fought  
  41.   
  42. here, or of the women who from here have drenched with boiling water  
  43.   
  44. the enemy, clad in white, and 'biding in the snow to surprise the  
  45.   
  46. city.  
  47. --------------next filter's  result------------------  
  48.     "The poor little children, how happy they are- how merrily they  
  49. they have no shoes nor stockings. They dance on the green rampart,  
  50. tale, else would they fancy they heard a child crying deep below the  
  51. tears of woe. Nor do they know anything of the Danish King who here,  
  52. die here in his nest; they know nothing of the men who have fought  
  53. -------------next sample-------------------  
  54.     "The poor little children, how happy they are- how merrily they  
  55.   
  56. it, and which was afterwards closed over the child; and from that  
  57. in the face of the coming foe, took an oath before all his trembling  
  58. ----------next sort----------------------  
  59. : 27  
  60. "The: 1  
  61. 'biding: 1  
  62. A: 1  
  63. Danish: 1  
  64. King: 1  
  65. Look!: 1  
  66. Nor: 1  
  67. She: 1  
  68. The: 1  
  69. They: 1  
  70. What: 2  
  71. a: 2  
  72. according: 1  
  73. afterwards: 1  
  74. all: 1  
  75. always: 1  
  76. an: 3  
  77. and: 12  
  78. angels': 1  
  79. anything: 1  
  80. are: 1  
  81. are-: 1  
  82. at: 2  
  83. balsam,: 1  
  84. be: 1  
  85. been: 1  
  86. before: 2  
  87. below: 1  
  88. blade: 1  
  89. boiling: 1  
  90. but: 1  
  91. by: 1  
  92. capital,: 1  
  93. cheeks: 1  
  94. child: 2  
  95. child;: 1  
  96. children: 1  
  97. children,: 1  
  98. citizens: 1  
  99. city.: 1  
  100. clad: 1  
  101. closed: 1  
  102. coming: 1  
  103. courtiers: 1  
  104. covered: 1  
  105. crying: 1  
  106. dance: 1  
  107. deep: 1  
  108. dewdrops: 1  
  109. die: 1  
  110. do: 1  
  111. drama: 1  
  112. drenched: 1  
  113. dug: 1  
  114. each: 1  
  115. earth,: 1  
  116. else: 1  
  117. enemy,: 1  
  118. eyes!: 1  
  119. face: 1  
  120. fancy: 1  
  121. fast,: 1  
  122. firm: 1  
  123. flowers,: 1  
  124. foe,: 1  
  125. for: 1  
  126. fought: 1  
  127. frolicsome: 1  
  128. from: 3  
  129. gave: 1  
  130. gaze.: 1  
  131. grass: 1  
  132. grass-covered: 1  
  133. grave: 1  
  134. green: 2  
  135. ground: 2  
  136. had: 1  
  137. happy: 1  
  138. have: 3  
  139. he: 1  
  140. heard: 1  
  141. her: 1  
  142. here: 2  
  143. here,: 2  
  144. his: 3  
  145. hold: 1  
  146. how: 2  
  147. in: 4  
  148. in,: 1  
  149. into: 1  
  150. inward: 1  
  151. is: 2  
  152. it,: 1  
  153. itself: 1  
  154. just: 1  
  155. know: 3  
  156. leaf: 1  
  157. leans: 1  
  158. life: 1  
  159. little: 2  
  160. longer,: 1  
  161. looks: 1  
  162. lured: 1  
  163. maid: 1  
  164. maid.: 1  
  165. many: 1  
  166. means: 1  
  167. men: 1  
  168. merrily: 1  
  169. moment,: 1  
  170. mound: 1  
  171. nest;: 1  
  172. no: 2  
  173. nor: 1  
  174. nothing: 2  
  175. now: 1  
  176. oath: 1  
  177. of: 9  
  178. of?: 1  
  179. old: 5  
  180. on: 5  
  181. open: 1  
  182. or: 1  
  183. out: 1  
  184. over: 1  
  185. people: 1  
  186. place: 1  
  187. play: 2  
  188. playing.: 1  
  189. plucks: 1  
  190. poor: 1  
  191. quickly: 1  
  192. rampart,: 2  
  193. ready: 1  
  194. red: 1  
  195. remained: 1  
  196. romp: 1  
  197. sank: 1  
  198. says,: 1  
  199. shoes: 1  
  200. snow: 1  
  201. sportive,: 1  
  202. spot: 1  
  203. stockings.: 1  
  204. story: 1  
  205. story,: 1  
  206. surprise: 1  
  207. sweetmeats: 1  
  208. tale,: 1  
  209. tears: 1  
  210. that: 3  
  211. the: 26  
  212. them: 1  
  213. there: 1  
  214. they: 7  
  215. thinking: 1  
  216. to: 3  
  217. together!: 1  
  218. took: 1  
  219. toys: 1  
  220. trembling: 1  
  221. turf.: 1  
  222. unfolding: 1  
  223. was: 2  
  224. water: 1  
  225. way: 1  
  226. what: 1  
  227. where: 1  
  228. where,: 1  
  229. which: 2  
  230. white,: 1  
  231. who: 4  
  232. whole: 1  
  233. window: 1  
  234. with: 3  
  235. withered: 1  
  236. woe.: 1  
  237. women: 1  
  238. would: 3  
spark home:Optional.of(/usr/lib/cloud/spark/spark-0.9.0-incubating-bin-hadoop2)
 Look! at the window there leans an old maid. She plucks the

withered leaf from the balsam, and looks at the grass-covered rampart,

on which many children are playing. What is the old maid thinking

of? A whole life drama is unfolding itself before her inward gaze.

    "The poor little children, how happy they are- how merrily they

play and romp together! What red cheeks and what angels' eyes! but

they have no shoes nor stockings. They dance on the green rampart,

just on the place where, according to the old story, the ground always

sank in, and where a sportive, frolicsome child had been lured by

means of flowers, toys and sweetmeats into an open grave ready dug for

it, and which was afterwards closed over the child; and from that

moment, the old story says, the ground gave way no longer, the mound

remained firm and fast, and was quickly covered with the green turf.

The little people who now play on that spot know nothing of the old

tale, else would they fancy they heard a child crying deep below the

earth, and the dewdrops on each blade of grass would be to them

tears of woe. Nor do they know anything of the Danish King who here,

in the face of the coming foe, took an oath before all his trembling

courtiers that he would hold out with the citizens of his capital, and

die here in his nest; they know nothing of the men who have fought

here, or of the women who from here have drenched with boiling water

the enemy, clad in white, and 'biding in the snow to surprise the

city.
--------------next filter's  result------------------
    "The poor little children, how happy they are- how merrily they
they have no shoes nor stockings. They dance on the green rampart,
tale, else would they fancy they heard a child crying deep below the
tears of woe. Nor do they know anything of the Danish King who here,
die here in his nest; they know nothing of the men who have fought
-------------next sample-------------------
    "The poor little children, how happy they are- how merrily they

it, and which was afterwards closed over the child; and from that
in the face of the coming foe, took an oath before all his trembling
----------next sort----------------------
: 27
"The: 1
'biding: 1
A: 1
Danish: 1
King: 1
Look!: 1
Nor: 1
She: 1
The: 1
They: 1
What: 2
a: 2
according: 1
afterwards: 1
all: 1
always: 1
an: 3
and: 12
angels': 1
anything: 1
are: 1
are-: 1
at: 2
balsam,: 1
be: 1
been: 1
before: 2
below: 1
blade: 1
boiling: 1
but: 1
by: 1
capital,: 1
cheeks: 1
child: 2
child;: 1
children: 1
children,: 1
citizens: 1
city.: 1
clad: 1
closed: 1
coming: 1
courtiers: 1
covered: 1
crying: 1
dance: 1
deep: 1
dewdrops: 1
die: 1
do: 1
drama: 1
drenched: 1
dug: 1
each: 1
earth,: 1
else: 1
enemy,: 1
eyes!: 1
face: 1
fancy: 1
fast,: 1
firm: 1
flowers,: 1
foe,: 1
for: 1
fought: 1
frolicsome: 1
from: 3
gave: 1
gaze.: 1
grass: 1
grass-covered: 1
grave: 1
green: 2
ground: 2
had: 1
happy: 1
have: 3
he: 1
heard: 1
her: 1
here: 2
here,: 2
his: 3
hold: 1
how: 2
in: 4
in,: 1
into: 1
inward: 1
is: 2
it,: 1
itself: 1
just: 1
know: 3
leaf: 1
leans: 1
life: 1
little: 2
longer,: 1
looks: 1
lured: 1
maid: 1
maid.: 1
many: 1
means: 1
men: 1
merrily: 1
moment,: 1
mound: 1
nest;: 1
no: 2
nor: 1
nothing: 2
now: 1
oath: 1
of: 9
of?: 1
old: 5
on: 5
open: 1
or: 1
out: 1
over: 1
people: 1
place: 1
play: 2
playing.: 1
plucks: 1
poor: 1
quickly: 1
rampart,: 2
ready: 1
red: 1
remained: 1
romp: 1
sank: 1
says,: 1
shoes: 1
snow: 1
sportive,: 1
spot: 1
stockings.: 1
story: 1
story,: 1
surprise: 1
sweetmeats: 1
tale,: 1
tears: 1
that: 3
the: 26
them: 1
there: 1
they: 7
thinking: 1
to: 3
together!: 1
took: 1
toys: 1
trembling: 1
turf.: 1
unfolding: 1
was: 2
water: 1
way: 1
what: 1
where: 1
where,: 1
which: 2
white,: 1
who: 4
whole: 1
window: 1
with: 3
withered: 1
woe.: 1
women: 1
would: 3




  • 1
    点赞
  • 28
    收藏
    觉得还不错? 一键收藏
  • 3
    评论
Java是一种流行的编程语言,而Spark是一种基于内存的大数据处理框架,支持并行处理。与此同时,HBase是一种分布式NoSQL数据库,通常用于存储大数据。在许多大数据应用程序中,需要将Spark与HBase集成,以便能够使用Spark的显式并行性来查询和分析HBase中的数据。 为了编写Spark程序并行查询HBase指定数据,我们需要按照以下步骤进行: 1. 通过Java API或者Scala API连接HBase: 2. 使用Spark Context对象创建一个Spark RDD,并将其分布式化(Parallelize),以便在分布式集群中并行处理数据。 3. 使用HBase API从HBase中读取指定的数据,并将其转换为Spark RDD对象。 4. 在Spark RDD对象上执行计算,并将结果保存到HDFS或者其他外部存储系统中。 具体的实现过程如下: 1. 连接HBase: 在Java中,我们可以使用HBase Configuration类来连接HBase。代码示例如下: Configuration conf = HBaseConfiguration.create(); conf.set("hbase.zookeeper.quorum", "localhost:2181"); // ZooKeeper服务器地址 TableName table = TableName.valueOf("my_table"); // HTable名称 Connection conn = ConnectionFactory.createConnection(conf); // 创建HBase连接 Table hTable = conn.getTable(table); // 获取HTable实例 2. 创建Spark RDD并分布式化: 在Java中,我们可以使用JavaSparkContext类来创建一个Spark RDD。代码示例如下: JavaSparkContext sc = new JavaSparkContext(); List<String> list = Arrays.asList("data1", "data2", "data3"); JavaRDD<String> rdd = sc.parallelize(list); // 创建Spark RDD并分布式化 3. 读取HBase数据: 在Java中,我们可以使用HBase Table类来读取HBase中的数据。代码示例如下: Get get = new Get(Bytes.toBytes(rowKey)); // 指定行键 Result result = hTable.get(get); // 读取数据 List<Cell> cells = result.listCells(); // 获取所有的单元格 for (Cell cell : cells) { byte[] value = CellUtil.cloneValue(cell); String data = Bytes.toString(value); System.out.println(data); // 输出数据 } 4. 执行计算并保存结果: 在Java中,我们可以使用Spark RDD的操作来执行计算,并将结果保存到HDFS或其他外部存储系统中。代码示例如下: JavaRDD<String> result = rdd.filter(new Function<String, Boolean>() { public Boolean call(String s) { return s.startsWith("data"); } }); result.saveAsTextFile("hdfs://localhost:9000/result_folder"); // 将结果保存到HDFS中 综上所述,使用Java编写Spark程序并行查询HBase指定数据需要连接HBase、创建Spark RDD并分布式化、读取HBase数据和执行计算并保存结果等步骤。在实际应用中,我们需要根据具体的业务需求来调整程序逻辑以及执行效率等方面的问题。
评论 3
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值