val sc =newSparkContext(conf);
val spark = SparkSession.builder().config(conf).getOrCreate();
sc.setLogLevel("ERROR");
val strRDD = sc.parallelize(List(Row("jason",33),Row("Tom",50)));
val schema =StructType(List(StructField("Name",StringType,true),StructField("Age",IntegerType,true)));
val df = spark.createDataFrame(strRDD,schema);
df.printSchema();
df.show();
val spark = SparkSession.builder().config(conf).getOrCreate();
spark.range(1,100).toDF("Num").show();
输出日志
+---+|Num|+---+|1||2||3||4||5||6||7||8||9||10||11||12||13||14||15||16||17||18||19||20|+---+
only showing top 20 rows
案例2. Creating DataFrames from Collection Tuple
val spark = SparkSession.builder().config(conf).getOrCreate();
val person =Seq(("Jason","DBA"),("Chen","Dev"))
val df = spark.createDataFrame(person).toDF("Name","jobs");
df.printSchema();
df.show();
val spark = SparkSession.builder().config(conf).getOrCreate();
val df = spark.read.text("file:///c://README.txt");
df.show(300);
输出日志
+--------------------+| value|+--------------------+| TCPDF - README||=================...|
|||I WISH TO IMPROVE...|
|PLEASE MAKE A DON...|
案例2. Creating DataFrames by Reading CSV Files
val spark = SparkSession.builder().config(conf).getOrCreate();
val df = spark.read.option("header",true).csv("file:///c://samle1.csv");
df.show();
输出日志
+---+-----------+----+||ID_PROVINCE|NAME|+---+-----------+----+|1|91|泰安||2|91|济宁||3|91|临沂||4|77|孝感||5|77|黄冈||6|69|娄底||7|69|益阳||8|69|怀化||9|69|永州||10|69|邵阳||11|91|莱芜||12|77|鄂州||13|77|随州||14|74|安庆||15|74|蚌埠||16|74|亳州||17|74|池州||18|74|滁州||19|74|阜阳||20|74|合肥|+---+-----------+----+
only showing top 20 rows
案例2. Creating DataFrames by Reading TSV Files
val spark = SparkSession.builder().config(conf).getOrCreate();
val schema =StructType(List(StructField("ID",IntegerType,true),StructField("PID",IntegerType,true),StructField("省份",StringType,true)));
val df = spark.read.option("head",true).option("sep","\t").schema(schema).csv("file:///c://sample2.tsv");
df.show();
输出日志
+----+----+----+| ID| PID|省份|+----+----+----+|null|null|null||1|91|泰安||2|91|济宁||3|91|临沂||4|77|孝感||5|77|黄冈||6|69|娄底||7|69|益阳||8|69|怀化||9|69|永州||10|69|邵阳||11|91|莱芜||12|77|鄂州||13|77|随州||14|74|安庆||15|74|蚌埠||16|74|亳州||17|74|池州||18|74|滁州||19|74|阜阳|+----+----+----+
only showing top 20 rows
案例3. Creating DataFrames by Reading JSON Files
val spark = SparkSession.builder().config(conf).getOrCreate();
val df = spark.read.json("file:///c://notification.json");
df.printSchema();
df.show();
val spark = SparkSession.builder().config(conf).getOrCreate();
val str_url ="jdbc:mysql://localhost:3306/miracleops";
val table_df = spark.read.format("jdbc").option("driver","com.mysql.jdbc.Driver").option("url",str_url).option("dbtable","user").option("user","root").option("password","root").load();
table_df.printSchema();
table_df.select("email","username").show();