spark:学习杂记--37

《快学scala》第四章课后习题:

1.设置一个映射,其中包含你想要的一些装备,以及它们的价格。然后构建另一个映射,采用同一组键,但是价格上打9折

2.编写一段程序,从文件中读取单词。用一个可变映射来清点每个单词出现的频率。读取这些单词的操作可以使用java.util.Scanner:

3.重复前一个练习,这次用不可变的映射

4.重复前一个练习,这次使用已排序的映射,以便单词可以按顺序打印出来

5.重复前一个练习,这次使用java.util.TreeMap并使之适用于Scala API

6.定义一个链式哈希映射,将"Monday"映射到java.util.Calendar.MONDAY,依次类推加入其他日期。展示元素是以插入的顺序被访问的

7.打印出所有Java系统属性的表格

8.编写一个函数minmax(values:Array[Int]),返回数组中最小值和最大值的对偶

9.编写一个函数Iteqgt(values:Array[int],v:Int),返回数组中小于v,等于v和大于v的数量,要求三个值一起返回

10. 当你将两个字符串拉链在一起,比如"Hello".zip("World"),会是什么结果?想出一个讲得通的用例

package SK

import java.util.Calendar
import scala.collection.JavaConversions.propertiesAsScalaMap
import org.apache.spark.{SparkContext, SparkConf}
import org.apache.spark.SparkContext._
import scala.collection.mutable.LinkedHashMap
import scala.collection.{mutable, SortedMap}
import scala.collection.immutable.{TreeMap, HashMap}
import scala.io.Source

/**
 * Created by sendoh on 2015/5/4.
 */
object answerx {
  def main(args: Array[String]): Unit ={
    //1.打九折
    val map1 = Map("pen" -> 5, "book" -> 20, "iphone" -> 5000 )
    for ((k, v) <- map1) yield (k, v * 0.9)
    //打印单词和出现次数
    val conf = new SparkConf()
    val sc = new SparkContext(conf)
    val datawords = sc.textFile("Example.txt").flatMap(_.split(" ")).map(word => (word, 1)).reduceByKey(_ + _).collect()
    println(datawords)
    //2-val in = new java.util.Scanner(new java.io.File("myfile.txt")) while(in.hasNext()) 处理 in.next() 或者翻到第9章看看更Scala的做法。 
    //最后,打印出所有单词和它们出现的次数。
    val source2 = Source.fromFile("Example.txt").mkString.split("\\s+")
    val map2 = new HashMap[String, Int]
    for (key <- source2){
      map2(key) = map2.getOrElse(key, 0) + 1
    }
    println(map2.mkString(","))
    //3-不可变映射与可变映射的区别就是,每次添加元素,都会返回一个新的映射
    val source3 = Source.fromFile("Example.txt").mkString.split("\\s+")
    var map3 = new HashMap[String, Int]
    for (key <- source3){
      map3 += (key -> (map3.getOrElse(key, 0) + 1))
    }
    println(map3.mkString(","))
    //4-和上面的代码没有什么区别,只是将映射修改为SortedMap
    val source4 = Source.fromFile("Example.txt").mkString.split("\\s+")
    var map4 = SortedMap[String, Int]()
    for (key <- source4){
      map4 += (key -> (map4.getOrElse(key, 0) + 1))
    }
    println(map4.mkString(","))
    //5
    val source5 = Source.fromFile("Example.txt").mkString.split("\\s+")
    val map5: Map[String, Int] = new TreeMap[String, Int]
    for (key <- source5){
      map5(key) = map5.getOrElse(key, 0) + 1
    }
    println(map5.mkString(","))
    //6
    val map6 = new LinkedHashMap[String, Int]
    map6 += ("Monday" -> Calendar.MONDAY)
    map6 += ("Tuesday" -> Calendar.TUESDAY)
    map6 += ("Wednesday" -> Calendar.WEDNESDAY)
    map6 += ("Thursday" -> Calendar.THURSDAY)
    map6 += ("Friday" -> Calendar.FRIDAY)
    map6 += ("Saturday" -> Calendar.SATURDAY)
    map6 += ("Sunday" -> Calendar.SUNDAY)
    //7-属性转scala map的使用
    val props: scala.collection.Map[String, String] = System.getProperties()
    val keys = props.keySet
    val keyLengths = for( key <- keys ) yield key.length
    val maxKeyLength = keyLengths.max
    for(key <- keys) {
      print(key)
      print(" " * (maxKeyLength - key.length))
      print(" | ")
      println(props(key))
    }
    //8
    def maxmin(values: Array[Int]): Unit ={
      (values.max, values.min)
    }
    //9
    def iteqgt(values: Array[Int], v: Int): Unit ={
      val buf = values.toBuffer
      (values.count(_ < v), values.count(_ == v), values.count(_ > v))
    }
    //10
    "Hello".zip("World")
    //res0: scala.collection.immutable.IndexedSeq[(Char, Char)] = Vector((H,W), (e,o), (l,r), (l,l), (o,d))
  }

}
/

SimpleSkewedGroupByText:spark例子

package SK

import org.apache.spark.{SparkContext, SparkConf}
import org.apache.spark.SparkContext._
import scala.util.Random

/**
 * Created by sendoh on 2015/5/4.
 */
object SimpleSkewedGroupByText {
  def main(args: Array[String]): Unit ={
    val conf = new SparkConf().setAppName("SimpleSkewedGroupByText")
    val sc = new SparkContext(conf)
    var numMappers = if (args.length > 0) args(0).toInt else 2
    var numKVPairs = if (args.length > 1) args(1).toInt else 1000
    var valSize = if (args.length > 2) args(2).toInt else 1000
    var numReducers = if (args.length > 3) args(3).toInt else numMappers
    var ratio = if (args.length > 4) args(4).toInt else 5.0
    //
    val pairs1 = sc.parallelize(0 until numMappers, numMappers).flatMap{ p =>
      val ranGen = new Random
      val result = new Array[(Int, Array[Byte])](numKVPairs)
      for (i <- 0 until numKVPairs){
        val byteArr = new Array[Byte](valSize)
        ranGen.nextBytes(byteArr)
        val offset = ranGen.nextInt(1000) * numReducers
        if (ranGen.nextDouble < ratio / (numReducers + ratio - 1)){
          result(i) = (offset, byteArr)
        } else {
          val key = 1 + ranGen.nextInt(numReducers - 1) + offset
          result(i) = (key, byteArr)
        }
      }
      result
    }.cache
    pairs1.count
    println("RESULT: " + pairs1.groupByKey(numReducers).count)
    sc.stop()
  }

}


评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值