《快学scala》第四章课后习题:
1.设置一个映射,其中包含你想要的一些装备,以及它们的价格。然后构建另一个映射,采用同一组键,但是价格上打9折
2.编写一段程序,从文件中读取单词。用一个可变映射来清点每个单词出现的频率。读取这些单词的操作可以使用java.util.Scanner:
3.重复前一个练习,这次用不可变的映射
4.重复前一个练习,这次使用已排序的映射,以便单词可以按顺序打印出来
5.重复前一个练习,这次使用java.util.TreeMap并使之适用于Scala API
6.定义一个链式哈希映射,将"Monday"映射到java.util.Calendar.MONDAY,依次类推加入其他日期。展示元素是以插入的顺序被访问的
7.打印出所有Java系统属性的表格
8.编写一个函数minmax(values:Array[Int]),返回数组中最小值和最大值的对偶
9.编写一个函数Iteqgt(values:Array[int],v:Int),返回数组中小于v,等于v和大于v的数量,要求三个值一起返回
10. 当你将两个字符串拉链在一起,比如"Hello".zip("World"),会是什么结果?想出一个讲得通的用例
package SK
import java.util.Calendar
import scala.collection.JavaConversions.propertiesAsScalaMap
import org.apache.spark.{SparkContext, SparkConf}
import org.apache.spark.SparkContext._
import scala.collection.mutable.LinkedHashMap
import scala.collection.{mutable, SortedMap}
import scala.collection.immutable.{TreeMap, HashMap}
import scala.io.Source
/**
* Created by sendoh on 2015/5/4.
*/
object answerx {
def main(args: Array[String]): Unit ={
//1.打九折
val map1 = Map("pen" -> 5, "book" -> 20, "iphone" -> 5000 )
for ((k, v) <- map1) yield (k, v * 0.9)
//打印单词和出现次数
val conf = new SparkConf()
val sc = new SparkContext(conf)
val datawords = sc.textFile("Example.txt").flatMap(_.split(" ")).map(word => (word, 1)).reduceByKey(_ + _).collect()
println(datawords)
//2-val in = new java.util.Scanner(new java.io.File("myfile.txt")) while(in.hasNext()) 处理 in.next() 或者翻到第9章看看更Scala的做法。
//最后,打印出所有单词和它们出现的次数。
val source2 = Source.fromFile("Example.txt").mkString.split("\\s+")
val map2 = new HashMap[String, Int]
for (key <- source2){
map2(key) = map2.getOrElse(key, 0) + 1
}
println(map2.mkString(","))
//3-不可变映射与可变映射的区别就是,每次添加元素,都会返回一个新的映射
val source3 = Source.fromFile("Example.txt").mkString.split("\\s+")
var map3 = new HashMap[String, Int]
for (key <- source3){
map3 += (key -> (map3.getOrElse(key, 0) + 1))
}
println(map3.mkString(","))
//4-和上面的代码没有什么区别,只是将映射修改为SortedMap
val source4 = Source.fromFile("Example.txt").mkString.split("\\s+")
var map4 = SortedMap[String, Int]()
for (key <- source4){
map4 += (key -> (map4.getOrElse(key, 0) + 1))
}
println(map4.mkString(","))
//5
val source5 = Source.fromFile("Example.txt").mkString.split("\\s+")
val map5: Map[String, Int] = new TreeMap[String, Int]
for (key <- source5){
map5(key) = map5.getOrElse(key, 0) + 1
}
println(map5.mkString(","))
//6
val map6 = new LinkedHashMap[String, Int]
map6 += ("Monday" -> Calendar.MONDAY)
map6 += ("Tuesday" -> Calendar.TUESDAY)
map6 += ("Wednesday" -> Calendar.WEDNESDAY)
map6 += ("Thursday" -> Calendar.THURSDAY)
map6 += ("Friday" -> Calendar.FRIDAY)
map6 += ("Saturday" -> Calendar.SATURDAY)
map6 += ("Sunday" -> Calendar.SUNDAY)
//7-属性转scala map的使用
val props: scala.collection.Map[String, String] = System.getProperties()
val keys = props.keySet
val keyLengths = for( key <- keys ) yield key.length
val maxKeyLength = keyLengths.max
for(key <- keys) {
print(key)
print(" " * (maxKeyLength - key.length))
print(" | ")
println(props(key))
}
//8
def maxmin(values: Array[Int]): Unit ={
(values.max, values.min)
}
//9
def iteqgt(values: Array[Int], v: Int): Unit ={
val buf = values.toBuffer
(values.count(_ < v), values.count(_ == v), values.count(_ > v))
}
//10
"Hello".zip("World")
//res0: scala.collection.immutable.IndexedSeq[(Char, Char)] = Vector((H,W), (e,o), (l,r), (l,l), (o,d))
}
}
/
SimpleSkewedGroupByText:spark例子
package SK
import org.apache.spark.{SparkContext, SparkConf}
import org.apache.spark.SparkContext._
import scala.util.Random
/**
* Created by sendoh on 2015/5/4.
*/
object SimpleSkewedGroupByText {
def main(args: Array[String]): Unit ={
val conf = new SparkConf().setAppName("SimpleSkewedGroupByText")
val sc = new SparkContext(conf)
var numMappers = if (args.length > 0) args(0).toInt else 2
var numKVPairs = if (args.length > 1) args(1).toInt else 1000
var valSize = if (args.length > 2) args(2).toInt else 1000
var numReducers = if (args.length > 3) args(3).toInt else numMappers
var ratio = if (args.length > 4) args(4).toInt else 5.0
//
val pairs1 = sc.parallelize(0 until numMappers, numMappers).flatMap{ p =>
val ranGen = new Random
val result = new Array[(Int, Array[Byte])](numKVPairs)
for (i <- 0 until numKVPairs){
val byteArr = new Array[Byte](valSize)
ranGen.nextBytes(byteArr)
val offset = ranGen.nextInt(1000) * numReducers
if (ranGen.nextDouble < ratio / (numReducers + ratio - 1)){
result(i) = (offset, byteArr)
} else {
val key = 1 + ranGen.nextInt(numReducers - 1) + offset
result(i) = (key, byteArr)
}
}
result
}.cache
pairs1.count
println("RESULT: " + pairs1.groupByKey(numReducers).count)
sc.stop()
}
}