点击edit configuration,在左侧点击该项目。在右侧VM options中输入“-Dspark.master=local”,指示本程序本地单线程运行
new.txt
001,goods0001,10,20.00
002,goods0001,10,20.00
003,goods0002,50,30.00
004,goods0001,10,30.00
005,goods0003,90,10.00
006,goods0002,10,40.00
#
@Test
def test1(): Unit ={
val spark: SparkSession = SparkSession.builder().appName("Spark SQL basic example")
.config("spark.some.config.option", "some-value").getOrCreate()
//隐式转换 RDD to DataFrames
import spark.implicits._
val df: DataFrame = spark.read.csv("c:/users/os/desktop/new.txt") //载入csv格式数据
//打印所有数据
df.show()
//打印_c1字段,_c2字段加1
df.select($"_c1",$"_c2"+1).show()
//过滤掉大于等于50的字段
df.filter($"_c2">=50).show()
//根据指定字段分组
df.groupBy("_c1").count().show()
}
df.show()
+---+---------+---+-----+
|_c0| _c1|_c2| _c3|
+---+---------+---+-----+
|001|goods0001| 10|20.00|
|002|goods0001| 10|20.00|
|003|goods0002| 50|30.00|
|004|goods0001| 10|30.00|
|005|goods0003| 90|10.00|
|006|goods0002| 10|40.00|
+---+---------+---+-----+
df.select( "c1", ”_c2”+1).show()
+---------+---------+
| _c1|(_c2 + 1)|
+---------+---------+
|goods0001| 11.0|
|goods0001| 11.0|
|goods0002| 51.0|
|goods0001| 11.0|
|goods0003| 91.0|
|goods0002| 11.0|
+---------+---------+
df.filter($”_c2”>=50).show()
+---+---------+---+-----+
|_c0| _c1|_c2| _c3|
+---+---------+---+-----+
|003|goods0002| 50|30.00|
|005|goods0003| 90|10.00|
+---+---------+---+-----+
df.groupBy(“_c1”).count().show()
+---------+-----+
| _c1|count|
+---------+-----+
|goods0002| 2|
|goods0003| 1|
|goods0001| 3|
+---------+-----+