用Scala实现Stream(一个惰性的链表)数据结构


这个例子来源于scala圣经级教程《Functional Programming in Scala》,由于本人跟着书中的代码敲了一遍,然后写了点测试代码验证了一下正确性,所以就放在这做个备忘吧。贴出来只是为了方便自己,如果看不懂,但是又感兴趣的就去看原书吧……

package laziness

trait Stream[+A] {

  import laziness.Stream._

  def toListRecursive: List[A] = this match {
    case Cons(h, t) => h() :: t().toListRecursive
    case _ => List()
  }

  def toList: List[A] = {
    @annotation.tailrec
    def go(s: Stream[A], acc: List[A]): List[A] = s match {
      case Cons(h, t) => go(t(), h() :: acc)
      case _ => acc
    }

    go(this, List()).reverse
  }

  def toListFast: List[A] = {
    val buf = new scala.collection.mutable.ListBuffer[A]

    def go(s: Stream[A]): List[A] = s match {
      case Cons(h, t) => buf += h(); go(t())
      case _ => buf.toList
    }

    go(this)
  }

  def take(n: Int): Stream[A] = {
    this match {
      case Cons(h, t) if n > 1 => cons(h(), t().take(n - 1))
      case Cons(h, _) if n == 1 => cons(h(), empty)
      case _ => empty
    }
  }

  @annotation.tailrec
  final def drop(n: Int): Stream[A] = this match {
    case Cons(h, t) if n > 0 => t().drop(n - 1)
    case _ => this
  }

  def takeWhile(f: A => Boolean): Stream[A] = this match {
    case Cons(h, t) if (f(h())) => cons(h(), t() takeWhile f)
    case _ => empty
  }

  /**
    * 有栈溢出风险
    */
  def foldRight[B](z: => B)(f: (A, => B) => B): B =
    this match {
      case Cons(h, t) => f(h(), t().foldRight(z)(f))
      case _ => z
    }


  def foldLeft[B](z: => B)(f: (=> B, A) => B): B =
    this match {
      case Empty => z
      case Cons(h, t) => t().foldLeft(f(z, h()))(f)
    }

  def exists(p: A => Boolean): Boolean = foldRight(false)((a, b) => p(a) || b)

  def forAll(f: A => Boolean): Boolean = foldRight(true)((a, b) => f(a) && b)

  def takeWhile_1(f: A => Boolean): Stream[A] = foldRight(empty[A])((h, t) => if (f(h)) cons(h, t) else empty)

  def headOption: Option[A] = foldRight(None: Option[A])((h, _) => Some(h))

  def map[B](f: A => B): Stream[B] = foldRight(empty[B])((h, t) => cons(f(h), t))

  def filter(f: A => Boolean): Stream[A] = foldRight(empty[A])((h, t) => if (f(h)) cons(h, t) else t)

  def append[B >: A](s: => Stream[B]): Stream[B] = foldRight(s)((h, t) => cons(h, t))

  def flatMap[B](f: A => Stream[B]): Stream[B] = foldRight(empty[B])((h, t) => f(h).append(t))

  def mapViaUnfold[B](f: A => B): Stream[B] = unfold(this) {
    case Cons(h, t) => Some(f(h()), t())
    case _ => None
  }

  def takeViaUnfold(n: Int): Stream[A] = unfold((this, n)) {
    case (Cons(h, t), 1) => Some((h(), (empty, 0)))
    case (Cons(h, t), n) if n > 1 => Some((h(), (t(), n - 1)))
    case _ => None
  }

  def takeWhileViaUnfold(f: A => Boolean): Stream[A] = unfold(this) {
    case Cons(h, t) if f(h()) => Some(h(), t())
    case _ => None
  }

  def zipWith[B, C](s2: Stream[B])(f: (A, B) => C): Stream[C] =
    unfold((this, s2)) {
      case (Cons(h1, t1), Cons(h2, t2)) => Some((f(h1(), h2()), (t1(), t2())))
      case _ => None
    }

  def zip[B](s2: Stream[B]): Stream[(A, B)] = zipWith(s2)((_, _))

  def zipAll[B](s2: Stream[B]): Stream[(Option[A], Option[B])] = zipWithAll(s2)((_ -> _))

  def zipWithAll[B, C](s2: Stream[B])(f: (Option[A], Option[B]) => C): Stream[C] =
    Stream.unfold((this, s2)) {
      case (Empty, Empty) => None
      case (Cons(h, t), Empty) => Some(f(Some(h()), Option.empty[B]) -> (t(), empty[B]))
      case (Empty, Cons(h, t)) => Some(f(Option.empty[A], Some(h())) -> (empty[A], t()))
      case (Cons(h1, t1), Cons(h2, t2)) => Some(f(Some(h1()), Some(h2())) -> (t1(), t2()))
    }

  def startWith[A](s: Stream[A]): Boolean = zipAll(s).takeWhile(!_._2.isEmpty) forAll {
    case (h, h2) => h == h2
  }

  def tails: Stream[Stream[A]] = unfold(this) {
    case Empty => None
    case s => Some((s, s drop 1))
  } append Stream(empty)

  def hasSubSequence[A](s: Stream[A]): Boolean = tails exists (_ startWith s)

  def scanRight[B](z: B)(f: (A, => B) => B): Stream[B] = foldRight((z, Stream(z)))((a, p0) => {
    lazy val p1 = p0
    val b2 = f(a, p1._1)
    (b2, cons(b2, p1._2))
  })._2

  @annotation.tailrec
  final def find(f: A => Boolean): Option[A] = this match {
    case Empty => None
    case Cons(h, t) => if (f(h())) Some(h()) else t().find(f)
  }

}

case object Empty extends Stream[Nothing]

case class Cons[+A](h: () => A, t: () => Stream[A]) extends Stream[A]

object Stream {

  def cons[A](hd: => A, t1: => Stream[A]): Stream[A] = {
    lazy val head = hd
    lazy val tail = t1
    Cons(() => head, () => tail)
  }

  def empty[A]: Stream[A] = Empty

  def apply[A](as: A*): Stream[A] = if (as.isEmpty) Empty else cons(as.head, apply(as.tail: _*))

  val ones: Stream[Int] = cons(1, ones)

  def constant[A](a: A): Stream[A] = {
    lazy val tail: Stream[A] = Cons(() => a, () => tail)
    tail
  }

  def from(n: Int): Stream[Int] = cons(n, from(n + 1))

  val fibs = {
    def go(f0: Int, f1: Int): Stream[Int] = cons(f0, go(f1, f0 + f1))

    go(0, 1)
  }

  def unfold[A, S](z: S)(f: S => Option[(A, S)]): Stream[A] = f(z) match {
    case Some((h, s)) => cons(h, unfold(s)(f))
    case None => empty
  }

  def unfoldViaFold[A, S](z: S)(f: S => Option[(A, S)]): Stream[A] = f(z).fold(empty[A])((p: (A, S)) => cons(p._1, unfold(p._2)(f)))

  def unfoldViaMap[A, S](z: S)(f: S => Option[(A, S)]): Stream[A] = f(z).map((p: (A, S)) => cons(p._1, unfold(p._2)(f))).getOrElse(empty[A])

  val fibsViaUnfold = unfold((0, 1)) {
    case (f0, f1) => Some((f0, (f1, f0 + f1)))
  }

  def fromViaUnfold(n: Int) = unfold(n)(n => Some(n -> (n + 1)))

  def constantViaUnfold[A](a: A) = unfold(a)(_ => Some(a -> a))

  def onesViaUnfold = unfold(1)(_ => Some(1 -> 1))


  def main(args: Array[String]): Unit = {

    println(Stream.ones.asInstanceOf[Cons[Int]].h())
    println(Stream.ones.asInstanceOf[Cons[Int]].h)
    println(Stream.ones.asInstanceOf[Cons[Int]].t().asInstanceOf[Cons[Int]].h())


    val f = (to: Int, from: Int) => if(from < to) Some(from , from + 1) else None
    val  rangeToN = (from: Int, to: Int) => unfold(from)(f.curried(to))
    val rangeNum = rangeToN(0, 10).toList
    println(rangeNum)

    println(Stream.ones.take(10))
    println(Stream.ones.take(10).toList)
    println(Stream.fibsViaUnfold.take(10).toList)
    println(Stream.fibs.take(10).toList)

    println(Stream(1, 3, 5, 7, 9))
    println(Stream(1, 3, 5, 7, 9).toList)


    println(Stream.ones.zipWith(Stream.fibs)(_ -> _).take(10))
    println(Stream.ones.zipWith(Stream.fibs)(_ -> _).take(10).toList)


    println(Stream.ones.exists( (a: Int) => a == 1))

    println(Stream.from(0).exists( (a: Int) => a == 11))

    println(Stream.from(0).takeWhile( (a: Int) => a < 10))

    println(Stream.from(0).takeWhile( (a: Int) => a < 10).toList)

    println(Stream.ones.map(_ * 5).take(6).toList)


//    println(Stream.ones.foldRight(0)(_ + _)) //栈溢出

//    Stream.ones.exists( (a: Int) => a == 0) //栈溢出

//    Stream.ones.foldLeft(0)(_ +  _) //栈溢出

    println(Stream.ones.take(33).foldLeft(0)(_ +  _))

    println(Stream.ones.take(33).toListFast)
    println(Stream.ones.take(33).drop(29).toListFast)


    println(Stream.from(1).take(8).headOption.get)


    println(Stream.ones.take(2).append(Stream.constant(4).take(2)).toListFast)

    println(Stream.ones.take(2).append(Stream.constant(4).take(2)).startWith(Stream.ones.take(2)))
    println(Stream.ones.take(2).append(Stream.constant(4).take(2)).startWith(Stream.ones.take(3)))


    println(Stream.from(1).take(5).tails.toListRecursive.map(_ toList))


    println(Stream.from(1).take(5).toListFast)

    println(Stream.from(1).take(5).scanRight(0)(_ + _).toListFast)


    println(Stream.from(1).take(5).find(_ == 3).getOrElse(-1))
    println(Stream.from(1).take(5).find(_ == 9).getOrElse(-1))

    println(Stream.empty.toList)

    println(Stream.empty[Any].map[String](_.toString))

  }

}


上述代码的运行结果是:

1
laziness.Stream$$$Lambda$3/1108411398@62043840
1
List(0, 1, 2, 3, 4, 5, 6, 7, 8, 9)
Cons(laziness.Stream$$$Lambda$3/1108411398@35fb3008,laziness.Stream$$$Lambda$4/584634336@7225790e)
List(1, 1, 1, 1, 1, 1, 1, 1, 1, 1)
List(0, 1, 1, 2, 3, 5, 8, 13, 21, 34)
List(0, 1, 1, 2, 3, 5, 8, 13, 21, 34)
Cons(laziness.Stream$$$Lambda$3/1108411398@531be3c5,laziness.Stream$$$Lambda$4/584634336@52af6cff)
List(1, 3, 5, 7, 9)
Cons(laziness.Stream$$$Lambda$3/1108411398@12843fce,laziness.Stream$$$Lambda$4/584634336@3dd3bcd)
List((1,0), (1,1), (1,1), (1,2), (1,3), (1,5), (1,8), (1,13), (1,21), (1,34))
true
true
Cons(laziness.Stream$$$Lambda$3/1108411398@3c09711b,laziness.Stream$$$Lambda$4/584634336@5cc7c2a6)
List(0, 1, 2, 3, 4, 5, 6, 7, 8, 9)
List(5, 5, 5, 5, 5, 5)
33
List(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1)
List(1, 1, 1, 1)
1
List(1, 1, 4, 4)
true
false
List(List(1, 2, 3, 4, 5), List(2, 3, 4, 5), List(3, 4, 5), List(4, 5), List(5), List())
List(1, 2, 3, 4, 5)
List(15, 14, 12, 9, 5, 0)
3
-1
List()
Empty
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
以下是使用Scala实现Spark读取OSS数据并写入另一个OSS的示例代码: ```scala import org.apache.spark.SparkConf import org.apache.spark.SparkContext import org.apache.spark.rdd.RDD import org.apache.spark.sql.SparkSession import org.apache.hadoop.fs.FileSystem import org.apache.hadoop.fs.Path import com.aliyun.oss.OSSClient import com.aliyun.oss.model.ObjectMetadata import com.aliyun.oss.model.PutObjectResult object OSSReadWriteExample { def main(args: Array[String]): Unit = { val accessKeyId = "your-access-key-id" val accessKeySecret = "your-access-key-secret" val endpoint = "your-oss-endpoint" val inputBucketName = "your-input-bucket-name" val outputBucketName = "your-output-bucket-name" val inputPath = "your-input-file-path" val outputPath = "your-output-file-path" // Create SparkSession val sparkConf = new SparkConf().setAppName("OSS Read Write Example") val spark = SparkSession.builder().config(sparkConf).getOrCreate() // Read data from OSS val inputData = spark.read.textFile(s"oss://${inputBucketName}.${endpoint}/${inputPath}") // Process data val processedData = inputData.map(line => line.toUpperCase()) // Write data to OSS val ossClient = new OSSClient(endpoint, accessKeyId, accessKeySecret) val outputObjectMetadata = new ObjectMetadata() outputObjectMetadata.setContentLength(processedData.toString().length) val outputObject = new PutObjectResult() val outputStream = processedData.map(line => line + "\n").map(_.getBytes("UTF-8")).collect().flatten ossClient.putObject(outputBucketName, outputPath, new java.io.ByteArrayInputStream(outputStream), outputObjectMetadata) // Close OSS client and SparkSession ossClient.shutdown() spark.close() } } ``` 在代码中,首先设置了OSS的访问凭证、OSS的Endpoint、输入和输出的Bucket名称、输入和输出的文件路径。然后创建了一个SparkSession,读取了输入文件中的数据,对数据进行处理,最后将处理后的结果写入到输出文件中。在写入数据到OSS时,使用了阿里云Java SDK提供的OSSClient类,创建了一个OSS客户端,通过putObject方法将数据写入到指定的OSS Bucket和文件路径中。最后关闭了OSS客户端和SparkSession。 需要注意的是,使用阿里云Java SDK时需要将对应的jar包添加到项目中,例如: ```xml <dependency> <groupId>com.aliyun.oss</groupId> <artifactId>aliyun-sdk-oss</artifactId> <version>3.11.0</version> </dependency> ``` 同时,需要在pom.xml中添加OSS Maven仓库的配置: ```xml <repositories> <repository> <id>oss-aliyun</id> <url>http://maven.aliyun.com/nexus/content/groups/public/</url> </repository> </repositories> ```
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值