An example of Integrating Spark and Cassandra

最新推荐文章于 2024-09-20 14:36:08 发布

masayoshi_louis

最新推荐文章于 2024-09-20 14:36:08 发布

阅读量1.7k

点赞数

分类专栏： Scala DB MapReduce Hadoop 文章标签： MapReduce Cassandra Hadoop Scala Spark

本文链接：https://blog.csdn.net/takamachi660/article/details/10605109

版权

Scala 同时被 3 个专栏收录

3 篇文章 0 订阅

订阅专栏

2 篇文章 0 订阅

订阅专栏

MapReduce

2 篇文章 0 订阅

订阅专栏

export SPARK_CLASSPATH=/usr/local/cassandra/current/lib/*

export MASTER=mesos://hadoop1:5050

./spark-shell

import java.nio.ByteBuffer
import java.util.{ Map => JMap }
import org.apache.cassandra.hadoop.ConfigHelper
import org.apache.cassandra.hadoop.cql3.CqlPagingInputFormat
import org.apache.cassandra.utils.ByteBufferUtil
import org.apache.hadoop.conf.Configuration

def cql3RDD(host : String, port : Int)(ks : String, table : String) = {

val conf = new Configuration(sc.hadoopConfiguration)

ConfigHelper.setInputPartitioner(conf, "Murmur3Partitioner")
ConfigHelper.setInputInitialAddress(conf, host)
ConfigHelper.setInputRpcPort(conf, port.toString)
ConfigHelper.setInputColumnFamily(conf, ks, table)

sc.newAPIHadoopRDD(conf, classOf[CqlPagingInputFormat], classOf[JMap[String,ByteBuffer]], classOf[JMap[String,ByteBuffer]])

}

val rdd = cql3RDD("hadoop1", 9160)("webtrans_tm_tdb", "d_1")

val filtered1 = rdd filter { case (k,v) => ByteBufferUtil.string(k.get("slang"))=="en" }
val filtered2 = filtered1 filter { case (k,v) => ByteBufferUtil.string(k.get("tlang"))=="zh" }
val filtered3 = filtered2 filter { case (k,v) => ByteBufferUtil.string(v.get("scntn")).toLowerCase.contains("macau") }
filtered3.count

filtered3.map(/*do some formatting*/).saveAsTextFile("hdfs://hadoop1:54310/containsMacau.txt")