//java的split方法,返回值为数组
scala> "apple".split("")
res35: Array[String] = Array(a, p, p, l, e)
//利用字符串构建rdd
scala> val rdd = sc.parallelize("apple")
rdd: org.apache.spark.rdd.RDD[Char] = ParallelCollectionRDD[28] at parallelize at <console>:24
//字符串相当于char数组,因此也可以用map
scala> rdd.map(_*2).collect
res36: Array[Int] = Array(194, 224, 224, 216, 202)
//必须说明对元素的处理
scala> rdd.map(_).collect
<console>:27: error: missing parameter type for expanded function ((x$1) => rdd.map(x$1).collect)
rdd.map(_).collect
^
//字符串是数组,也可以使用过滤filter
scala> rdd.filter(_=='a').collect
res38: Array[Char] = Array(a)
//从char数组取值
scala> rdd.take(1)
res39: Array[Char] = Array(a)
scala> rdd.take(2)
res40: Array[Char] = Array(a, p)
//rdd没有split方法
scala> rdd.split("")
<console>:27: error: value split is not a member of org.apache.spark.rdd.RDD[Char]
rdd.split("")
^
spark 字符串操作
最新推荐文章于 2022-04-18 22:12:13 发布