// Import Necessary Librariesimport com.microsoft.azure.cosmosdb.spark.schema._
import com.microsoft.azure.cosmosdb.spark._
import com.microsoft.azure.cosmosdb.spark.config.Config
// Read Configurationval readConfig = Config(Map("Endpoint"->"https://doctorwho.documents.azure.com:443/","Masterkey"->"YOUR-KEY-HERE","Database"->"DepartureDelays","Collection"->"flights_pcoll","query_custom"->"SELECT c.date, c.delay, c.distance, c.origin, c.destination FROM c WHERE c.origin = 'SEA'"// Optional))// Connect via azure-cosmosdb-spark to create Spark DataFrameval flights = spark.read.cosmosDB(readConfig)
flights.count()
从Cosmos批量写入数据到Spark
// Write configurationval writeConfig = Config(Map("Endpoint"->"https://doctorwho.documents.azure.com:443/","Masterkey"->"YOUR-KEY-HERE","Database"->"DepartureDelays","Collection"->"flights_fromsea","Upsert"->"true"))// Write to Cosmos DB from the flights DataFrameimport org.apache.spark.sql.SaveMode
flights.write.mode(SaveMode.Overwrite).cosmosDB(writeConfig)
从 Cosmos DB 流式读取
// Import Necessary Librariesimport com.microsoft.azure.cosmosdb.spark.schema._
import com.microsoft.azure.cosmosdb.spark._
import com.microsoft.azure.cosmosdb.spark.config.Config
// Read Configurationval readConfig = Config(Map("Endpoint"->"https://doctorwho.documents.azure.com:443/","Masterkey"->"YOUR-KEY-HERE","Database"->"DepartureDelays","Collection"->"flights_pcoll","ReadChangeFeed"->"true","ChangeFeedQueryName"->"Departure-Delays","ChangeFeedStartFromTheBeginning"->"false","InferStreamSchema"->"true","ChangeFeedCheckpointLocation"->"dbfs:/Departure-Delays"))// Open a read stream to the Cosmos DB Change Feed via azure-cosmosdb-spark to create Spark DataFrameval df = spark.readStream.format(classOf[CosmosDBSourceProvider].getName).options(readConfig).load()
流式写入 Cosmos DB
// Write configurationval writeConfig = Config(Map("Endpoint"->"https://doctorwho.documents.azure.com:443/","Masterkey"->"YOUR-KEY-HERE","Database"->"DepartureDelays","Collection"->"flights_fromsea","Upsert"->"true","WritingBatchSize"->"500","CheckpointLocation"->"/checkpointlocation_write1"))// Write to Cosmos DB from the flights DataFrame
df
.writeStream
.format(classOf[CosmosDBSinkProvider].getName).options(writeConfig).start()