oryx2默认配置文件

最新推荐文章于 2018-05-04 18:17:42 发布
CodingCatX
最新推荐文章于 2018-05-04 18:17:42 发布
阅读量1.3k
点赞数
分类专栏： Internet 学习总结文章标签：源码 spark kafka oryx
本文链接：https://blog.csdn.net/CodingCatX/article/details/51202221
版权
Internet 同时被 2 个专栏收录
3 篇文章 0 订阅
订阅专栏
学习总结
3 篇文章 0 订阅
订阅专栏
# Copyright (c) 2014, Cloudera, Inc. All Rights Reserved.
#
# Cloudera, Inc. licenses this file to you under the Apache License,
# Version 2.0 (the "License"). You may not use this file except in
# compliance with the License. You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# This software is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
# CONDITIONS OF ANY KIND, either express or implied. See the License for
# the specific language governing permissions and limitations under the
# License.
oryx = {
  # Optional string identifying the entire Oryx instance. Without it, Oryx has no
  # longer-term identity with external systems like Kafka. For example, without this set,
  # on startup the layers read from only the latest available input. With this set,
  # Kafka can load the last offset read and let the layers resume reading where they left off.
  id = null
  # Configuration for the Kafka input topic
  input-topic = {
    # Comma-separated list of Kafka brokers, as host1:port1(,host2:port2,...)
    broker = "localhost:9092"
    lock = {
      # Comma-separated list of Zookeeper masters, as host1:port1(,host2:port2,...)
      # Note that if you need to specify a chroot, then weirdly, it should only appear
      # at the end: host1:port1,host2:port2/chroot, not host1:port1/chroot,host2:port2/chroot
      master = "localhost:2181"
    }
    message = {
      # Input topic
      topic = "OryxInput"
      # Key/message classes that the framework receives, respectively
      key-class = "java.lang.String"
      message-class = "java.lang.String"
      # Decoder classes used to read/write key/message classes
      key-decoder-class = "kafka.serializer.StringDecoder"
      message-decoder-class = "kafka.serializer.StringDecoder"
    }
  }
  # Configuration for the Kafka model update topic
  update-topic = {
    # Comma-separated list of Kafka brokers, as host1:port1(,host2:port2,...)
    # Can be null to disable publishing to an update topic
    broker = "localhost:9092"
    lock = {
      # Comma-separated list of Zookeeper masters, as host1:port1(,host2:port2,...)
      # Note that if you need to specify a chroot, then weirdly, it should only appear
      # at the end: host1:port1,host2:port2/chroot, not host1:port1/chroot,host2:port2/chroot
      # Can be null to disable publishing to an update topic
      master = "localhost:2181"
    }
    message = {
      # Update topic
      # Can be null to disable publishing to an update topic
      topic = "OryxUpdate"
      # Decoder/encoder classes used to read/write key/message classes
      decoder-class = "kafka.serializer.StringDecoder"
      encoder-class = "kafka.serializer.StringEncoder"
      # Max size in bytes of message to write to the update topic. Don't change this unless you
      # know what you're doing. PMML models larger than this will be passed a location on HDFS,
      # for example. This should match the max.message.bytes configured for the update topic.
      # This value can't be larger than about 64MB in any event.
      max-size = 16777216
    }
  }
  # Default Spark key-value pairs used in batch, streaming
  default-streaming-config = {
    spark.io.compression.codec = "lzf"
    spark.speculation = true
    spark.logConf = true
    spark.serializer = "org.apache.spark.serializer.KryoSerializer"
    spark.ui.showConsoleProgress = false
  }
  # Batch layer configuration
  batch = {
    # Streaming framework configuration
    streaming = {
      # Spark Streaming master. If local[n], make sure n >= 2
      master = "yarn-client"
      # Interval between runs of the computation layer. Default: 6 hours
      generation-interval-sec = 21600
      # Number of executors to start. In YARN-based deployments, this is a
      # maximum, and fewer executors may be used when the process is idle if
      # dynamic-allocation is enabled
      num-executors = 4
      # Cores per executor
      executor-cores = 4
      # Memory per executor
      executor-memory = "2g"
      # Heap size for the Batch driver process.
      driver-memory = "1g"
      # Enable dynamic allocation? YARN-only and not always desirable for streaming
      dynamic-allocation = false
      # Spark config key-value pairs
      config = ${oryx.default-streaming-config}
    }
    # An implementation of com.cloudera.oryx.api.batch.BatchLayerUpdate
    # which specifies what is done with current and historical data to update a model
    update-class = null
    storage = {
      # Directory where historical data is stored. Can be local, or on HDFS, etc.
      data-dir = "file:/tmp/Oryx/data/"
      # Directory where models are output. Can be local, or on HDFS, etc.
      model-dir = "file:/tmp/Oryx/model/"
      # Writable classes used to persist key/message, respectively
      key-writable-class = "org.apache.hadoop.io.Text"
      message-writable-class = "org.apache.hadoop.io.Text"
      # Data older than this many hours may be automatically deleted. -1 means no maximum.
      max-age-data-hours = -1
    }
    # Configuration for the Spark UI
    ui = {
      # UI port
      port = 4040
    }
  }
  # Speed layer configuration
  speed = {
    # Streaming framework configuration
    streaming = {
      # Spark Streaming master. If local[n], make sure n >= 2
      master = "yarn-client"
      # See tuning rule of thumb above
      # Interval between runs of the computation layer in seconds. Default: 10 seconds
      generation-interval-sec = 10
      # Number of executors to start. In YARN-based deployments, this is a
      # maximum, and fewer executors may be used when the process is idle if
      # dynamic-allocation is enabled
      num-executors = 2
      # Cores per executor
      executor-cores = 4
      # Memory per executor
      executor-memory = "1g"
      # Heap size for the Speed driver process.
      driver-memory = "512m"
      # Enable dynamic allocation? YARN-only and not always desirable for streaming
      dynamic-allocation = false
      # Spark config key-value pairs
      config = ${oryx.default-streaming-config}
    }
    # Implementation of com.cloudera.oryx.api.speed.SpeedModelManager interface that produces
    # updates from a SpeedModel and stream of input
    model-manager-class = null
    # See doc for serving.min-model-serve-fraction below
    min-model-load-fraction = 0.8
    # Configuration for the Spark UI
    ui = {
      # UI port
      port = 4040
    }
  }
  # Serving layer configuration
  serving = {
    # Memory to allocate for each serving layer instance. Must be in MB for now, ending with 'm'
    memory = "4000m"
    # Optional config when using YARN-based deployment
    yarn = {
      # Number of serving layers to run
      instances = 1
      # Cores to allocate for each serving layer instance
      cores = "4"
    }
    api = {
      # Default to use well-known HTTP port for Serving Layer
      port = 80
      # Default to use well-known HTTPS port for Serving Layer
      secure-port = 443
      # User name for connecting to the service, if required. If set, must be set with password.
      # If enabled, this will enable HTTP DIGEST authentication in the API.
      user-name = null
      # Password for connecting to the service, if required. If set, must be set with user-name.
      # If enabled, this will enable HTTP DIGEST authentication in the API.
      password = null
      # The keystore file containing the server's SSL keys. Only necessary when
      # accessing a server with temporary self-signed key, which is not trusted
      # by the Java SSL implementation.
      keystore-file = null
      # Password needed for keystore file above, if any
      keystore-password = null
      # If true, operations that set or modify data, like /ingest, are not available
      read-only = false
      # An optional prefix for the path under which the service is deployed. For
      # example, set to "/contextPath" to expose services at paths like "http://example.org/contextPath/..."
      context-path = "/"
    }
    # Where to load application JAX-RS resources (one or more comma-separated Java package names)
    application-resources = null
    # Implementation of com.cloudera.oryx.api.serving.ServingModelManager interface
    # that produces a ServingModel from stream of updates
    model-manager-class = null
    # Don't consider a model loaded and ready to use until this fraction of it is loaded.
    # Some models load incrementally (e.g. ALS). Others load all at once, in which case this
    # has no effect.
    min-model-load-fraction = 0.8
    # Test-only option; don't set this in general
    no-init-topics = false
  }
  # ML tier configuration
  ml = {
    # Model evaluation settings
    eval = {
      # Fraction of current data that is used for test, versus training
      test-fraction = 0.1
      # Increase to build more candidate models per run, and pick the best one
      candidates = 1
      # Number of models to build in parallel
      parallelism = 1
    }
  }
}