package xunw
import org.apache.spark.SparkContext
import org.apache.spark.SparkContext._
import org.apache.spark.SparkConf
import org.apache.spark.rdd.HadoopRDD;
import org.apache.spark.rdd.ShuffledRDD;
import org.apache.hadoop.io.{LongWritable,Text}
import org.apache.hadoop.mapred.TextInputFormat
import scala.Tuple2
import java.util._
object wc {
def main(args: Array[String]) {
val path = "/home/xunw/data/tmp.txt" // Should be some file on your system
//val logFile = "hdfs://192.168.1.24:9000/user/xunw/tmp.txt"
//val sc = new SparkContext("spark://192.168.1.24:7077", "wc",System.getenv("SPARK_HOME"), Seq("/home/xunw/mySpark.jar"))
val conf = new SparkConf().setMaster("local[4]").setAppName("hdd");
val sc = new SparkContext(conf)
// val hrdd = new HadoopRDD(sc,conf,)
//val hrdd = sc.hadoopFile[LongW](path)
val file = sc.hadoopFil
spark 读取hadoop 格式的文件
最新推荐文章于 2023-08-13 11:42:27 发布
该博客展示了如何在Scala中使用Spark从Hadoop文件系统(HDFS)读取文本文件。通过创建SparkConf和SparkContext,然后使用hadoopFile方法以TextInputFormat读取文件,最终进行依赖检查和数据计数。
摘要由CSDN通过智能技术生成