一、10.17 周一
1.1)TiDB
- TiDB 高并发写入场景最佳实践
就是将 ID散列
1.2)Spark2.1.0入门:DataFrame的创建
scala> import org.apache.spark.sql.SparkSession
import org.apache.spark.sql.SparkSession
scala> val spark=SparkSession.builder().getOrCreate()
spark: org.apache.spark.sql.SparkSession = org.apache.spark.sql.SparkSession@2bdab835
//使支持RDDs转换为DataFrames及后续sql操作
scala> import spark.implicits._
import spark.implicits._
scala> val df = spark.read.json("file:///usr/local/spark/examples/src/main/resources/people.json")
df: org.apache.spark.sql.DataFrame = [age: bigint, name: string]
scala> df.show()
+----+-------+
| age| name|
+----+-------+
|null|Michael|
| 30| Andy|
| 19| Justin|
+----+-------+
- mac 安装 spark
brew install hadoop
brew install spark
cd spark/spark-3.3.0-bin-hadoop3/bin
./spark-shell
这样就启动起来了 scala
scala> df.select(df("name"), df("age") + 1).show()
+-------+---------+
| name|(age + 1)|
+-------+---------+
|Michael| null|
| Andy| 31|
| Justin| 20|
+-------+---------+
scala> df.filter(df("age") > 20).show()
+---+----+
|age|name|
+---+----+
| 30|Andy|
+---+----+