login as: root
┌────────────────────────────────────────────────────────────────────┐
│ • MobaXterm 11.0 • │
│ (SSH client, X-server and networking tools) │
│ │
│ ➤ SSH session to root@192.168.89.105 │
│ • SSH compression : ✔ │
│ • SSH-browser : ✔ │
│ • X11-forwarding : ✘ (disabled or not supported by server) │
│ • DISPLAY :172.16.9.44:0.0 │
│ │
│ ➤ For more info, ctrl+click on help or visit our website │
└────────────────────────────────────────────────────────────────────┘
Last login: Mon Dec 216:33:232019 from 192.168.89.1-bash:/root: Is a directory
[root@SparkOnStandalone~]# jps
12790 Jps
[root@SparkOnStandalone~]# start-dfs.sh
Starting namenodes on [SparkOnStandalone]
SparkOnStandalone:/root/.bashrc: line 13:/root: Is a directory
SparkOnStandalone: starting namenode, logging to /usr/hadoop-2.9.2/logs/hadoop-r oot-namenode-SparkOnStandalone.out
SparkOnStandalone:/root/.bashrc: line 13:/root: Is a directory
SparkOnStandalone: starting datanode, logging to /usr/hadoop-2.9.2/logs/hadoop-r oot-datanode-SparkOnStandalone.out
Starting secondary namenodes [SparkOnStandalone]
SparkOnStandalone:/root/.bashrc: line 13:/root: Is a directory
SparkOnStandalone: starting secondarynamenode, logging to /usr/hadoop-2.9.2/logs /hadoop-root-secondarynamenode-SparkOnStandalone.out
[root@SparkOnStandalone~]# jps
13321 DataNode
13195 NameNode
13615 SecondaryNameNode
13807 Jps
package mby00
import org.apache.spark.SparkConf
import org.apache.spark.streaming.{Seconds, StreamingContext}// countByValueAndWindow**(*windowLength*, *slideInterval*, [*numTasks*])// 怎样测试// 直接点击main + nc -lk 8888// ok
object CountByValueAndWindowTest {// main
def main(args: Array[String]): Unit ={
val conf =newSparkConf().setMaster("local[*]").setAppName(" countByValueAndWindow test")
val ssc =newStreamingContext(conf,Seconds(1))
ssc.sparkContext.setLogLevel("OFF")
val ds = ssc.socketTextStream("SparkOnStandalone",8888)
ssc.checkpoint("hdfs://SparkOnStandalone:9000/checkpoint6")
ds
.countByValueAndWindow(Seconds(5),Seconds(5)).print()
ssc.start()
ssc.awaitTermination()}}// 直接点击main运行后/**-------------------------------------------
Time: 1575421403000 ms
-------------------------------------------
-------------------------------------------
Time: 1575421408000 ms
-------------------------------------------
-------------------------------------------
Time: 1575421413000 ms
-------------------------------------------*/
package mby00
import org.apache.spark.SparkConf
import org.apache.spark.streaming.{Seconds, StreamingContext}// reduceByKeyAndWindow**(*func*, *invFunc*, *windowLength*, *slideInterval*, [*numTasks*])// 更高效率的`reduceByKeyAndWindow`方法// 原因: 计算时,会使用`上一窗口的计算结果 + 当前窗口的新增元素 - 上一窗口的过期数据// 怎样测试// 直接main nc -lk 8888// ok
object ReduceByKeyAndWindowTest {// main
def main(args: Array[String]): Unit ={
val conf =newSparkConf().setMaster("local[*]").setAppName(" reduceByKeyAndWindow test")
val ssc =newStreamingContext(conf,Seconds(1))
ssc.sparkContext.setLogLevel("OFF")
val ds = ssc.socketTextStream("SparkOnStandalone",8888)println("111111111111111111111111")
ssc.checkpoint("hdfs://SparkOnStandalone:9000/checkpoint7")
ds
.map(line =>(line,1)).reduceByKeyAndWindow((v1:Int,v2:Int)=> v1 + v2,(v1:Int,v2:Int)=> v1 - v2,Seconds(5),Seconds(2)).print()
ssc.start()
ssc.awaitTermination()}}// 直接点击main运行后/**
111111111111111111111111
-------------------------------------------
Time: 1575422934000 ms
-------------------------------------------
-------------------------------------------
Time: 1575422936000 ms
-------------------------------------------
-------------------------------------------
Time: 1575422938000 ms
-------------------------------------------*/// 最终结果/**
(mm,0)
(,0)
(mby1,0)
(m,0)
(b,0)
(mby3mby3,0)
(mby2,0)
(mby3,0)
(kkkkkkkkkkkkkkkkkkkkkkkkk,0)
(kkkkkkkkkkkkkkk,0)*/
package mby00.quickstart
import org.apache.spark.sql.{Dataset, SparkSession}// SparkSQL入门案例
object SparkSqlExample1SQLTest {// main
def main(args: Array[String]): Unit ={
val sparkSessoin = SparkSession
.builder().appName("SparkSQLExample1 test").master("local[*]").getOrCreate()import sparkSessoin.implicits._
val rdd = sparkSessoin.sparkContext.makeRDD(List(("Hello",1),("Scala",1),("Hello",1),("Spark",1)))
val dataset:Dataset[(String,Int)]= rdd.toDS()
dataset.createOrReplaceTempView("t_word")
sparkSessoin
.sql("select * from t_word").show()
sparkSessoin.close()}}/**+-----+---+
| _1| _2|
+-----+---+
|Hello| 1|
|Scala| 1|
|Hello| 1|
|Spark| 1|
+-----+---+*/
package mby00.quickstart
import org.apache.spark.sql.{Dataset, SparkSession}// SparkSQL入门案例// 怎样测试// 直接点击main// ok
object SparkSqlExample1SQLTest {// main
def main(args: Array[String]): Unit ={
val sparkSessoin = SparkSession
.builder().appName("SparkSQLExample1 test").master("local[*]").getOrCreate()import sparkSessoin.implicits._
val rdd = sparkSessoin.sparkContext.makeRDD(List(("Hello",1),("Scala",1),("Hello",1),("Spark",1)))
val dataset:Dataset[(String,Int)]= rdd.toDS()
dataset.createOrReplaceTempView("t_word")
sparkSessoin
.sql("select * from t_word").show()/**+-----+---+
| _1| _2|
+-----+---+
|Hello| 1|
|Scala| 1|
|Hello| 1|
|Spark| 1|
+-----+---+*/
sparkSessoin
.sql("select _1 as word, sum(_2) as num from t_word group by _1 ").show()/**+-----+---+
| word|num|
+-----+---+
|Hello| 2|
|Scala| 1|
|Spark| 1|
+-----+---+*/println("333333333333333333333333333333333333333")
sparkSessoin
.sql("select _1 as word, sum(_2) as num from t_word where _1 !='Scala' group by _1 order by num asc ").show()/**+-----+---+
| word|num|
+-----+---+
|Spark| 1|
|Hello| 2|
+-----+---+*/
sparkSessoin.close()}}
package mby00.quickstart
import org.apache.spark.sql.{Dataset, SparkSession}// SparkSQL 入门案例// 怎样测试// 直接main// ok
object SparkSqlExample1FunctionTest {// main
def main(args: Array[String]): Unit ={
val sparkSession = SparkSession
.builder().appName("SparkSqlExample1FunctionTest").master("local[*]").getOrCreate()import sparkSession.implicits._
val rdd = sparkSession.sparkContext.makeRDD(List(("Hello",1),("Scala",1),("Hello",1),("Spark",1)))
val dataset:Dataset[(String,Int)]= rdd.toDS()
dataset
.where("_1 != 'Scala'").groupBy("_1").sum("_2").withColumnRenamed("_1","word").withColumnRenamed("sum(_2)","sum").show()/** +-----+---+
| word|sum|
+-----+---+
|Hello| 2|
|Spark| 1|
+-----+---+*/
sparkSession.close()}}
package mby00.datasource
import org.apache.spark.sql.SparkSession
// 通过scala集合(元组)创建// 怎样测试// 直接点击main// ok
object CreateDataSetWithTupleTest {// main
def main(args: Array[String]): Unit ={
val sparkSession = SparkSession
.builder().appName("CreateDataSetWithTupleTest").master("local[*]").getOrCreate()import sparkSession.implicits._
val dateset =List((1,"zs",true,1000),(2,"ls",false,2000)).toDS()
dateset
.show()/**+---+---+-----+----+
| _1| _2| _3| _4|
+---+---+-----+----+
| 1| zs| true|1000|
| 2| ls|false|2000|
+---+---+-----+----+*/
sparkSession.close()}}
package mby00.datasource
import org.apache.spark.sql.SparkSession
// 通过样例类(case class)创建// 怎样测试// 直接main// ok
object CreateDatasetWithCaseLClassTest {// main
def main(args: Array[String]): Unit ={
val sparkSession = SparkSession
.builder().appName("CreateDatasetWithCaseLClassTest").master("local[*]").getOrCreate()import sparkSession.implicits._
val dataset =List(Person("zs",true,"bj"),Person("ls",false,"sh")).toDS()
dataset
.show()/** +----+-----+-------+
|name| sex|address|
+----+-----+-------+
| zs| true| bj|
| ls|false| sh|
+----+-----+-------+*/
sparkSession.close()}}caseclassPerson(name:String,sex:Boolean,address:String)// val 常量
package mby00.datasource
import org.apache.spark.sql.SparkSession
// 通过JSON文件创建// 怎样测试// 直接按main// ok
object CreateDatasetWithJSONTest {// main
def main(args: Array[String]): Unit ={
val sparkSesison = SparkSession
.builder().appName("CreateDatasetWithJSON").master("local[*]").getOrCreate()
val dateset = sparkSesison
.read
.json("D:\\IntelliJ IDEA 2018.2.5\\IEDAMBY\\sparksql-day1\\src\\main\\resources").as("user")
dateset
.show()/** +---+----+-----+
| id|name| sex|
+---+----+-----+
| 1| zs| true|
| 2| ls|false|
| 3| ww| true|
| 4| zl|false|
+---+----+-----+*/
sparkSesison.close()}}
package mby00.datasource
import org.apache.spark.sql.{Dataset, SparkSession}// 通过RDD 创建// 怎样测试// 直接main// ok
object CreateDatasetWithRDDTest {// main
def main(args: Array[String]): Unit ={
val sparkSession = SparkSession
.builder().appName("CreateDatasetWithRDDTest").master("local[*]").getOrCreate()import sparkSession.implicits._
val rdd = sparkSession.sparkContext.makeRDD(List(("Hello",1),("Hello",1),("Spark",1),("Scala",1)))
val dataset:Dataset[(String,Int)]= rdd.toDS()
dataset
.show()/**+-----+---+
| _1| _2|
+-----+---+
|Hello| 1|
|Hello| 1|
|Spark| 1|
|Scala| 1|
+-----+---+*/
sparkSession.close()}}