oryx2默认配置文件

# Copyright (c) 2014, Cloudera, Inc. All Rights Reserved.
#
# Cloudera, Inc. licenses this file to you under the Apache License,
# Version 2.0 (the "License"). You may not use this file except in
# compliance with the License. You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# This software is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
# CONDITIONS OF ANY KIND, either express or implied. See the License for
# the specific language governing permissions and limitations under the
# License.
oryx = {
  # Optional string identifying the entire Oryx instance. Without it, Oryx has no
  # longer-term identity with external systems like Kafka. For example, without this set,
  # on startup the layers read from only the latest available input. With this set,
  # Kafka can load the last offset read and let the layers resume reading where they left off.
  id = null
  # Configuration for the Kafka input topic
  input-topic = {
    # Comma-separated list of Kafka brokers, as host1:port1(,host2:port2,...)
    broker = "localhost:9092"
    lock = {
      # Comma-separated list of Zookeeper masters, as host1:port1(,host2:port2,...)
      # Note that if you need to specify a chroot, then weirdly, it should only appear
      # at the end: host1:port1,host2:port2/chroot, not host1:port1/chroot,host2:port2/chroot
      master = "localhost:2181"
    }
    message = {
      # Input topic
      topic = "OryxInput"
      # Key/message classes that the framework receives, respectively
      key-class = "java.lang.String"
      message-class = "java.lang.String"
      # Decoder classes used to read/write key/message classes
      key-decoder-class = "kafka.serializer.StringDecoder"
      message-decoder-class = "kafka.serializer.StringDecoder"
    }
  }
  # Configuration for the Kafka model update topic
  update-topic = {
    # Comma-separated list of Kafka brokers, as host1:port1(,host2:port2,...)
    # Can be null to disable publishing to an update topic
    broker = "localhost:9092"
    lock = {
      # Comma-separated list of Zookeeper masters, as host1:port1(,host2:port2,...)
      # Note that if you need to specify a chroot, then weirdly, it should only appear
      # at the end: host1:port1,host2:port2/chroot, not host1:port1/chroot,host2:port2/chroot
      # Can be null to disable publishing to an update topic
      master = "localhost:2181"
    }
    message = {
      # Update topic
      # Can be null to disable publishing to an update topic
      topic = "OryxUpdate"
      # Decoder/encoder classes used to read/write key/message classes
      decoder-class = "kafka.serializer.StringDecoder"
      encoder-class = "kafka.serializer.StringEncoder"
      # Max size in bytes of message to write to the update topic. Don't change this unless you
      # know what you're doing. PMML models larger than this will be passed a location on HDFS,
      # for example. This should match the max.message.bytes configured for the update topic.
      # This value can't be larger than about 64MB in any event.
      max-size = 16777216
    }
  }
  # Default Spark key-value pairs used in batch, streaming
  default-streaming-config = {
    spark.io.compression.codec = "lzf"
    spark.speculation = true
    spark.logConf = true
    spark.serializer = "org.apache.spark.serializer.KryoSerializer"
    spark.ui.showConsoleProgress = false
  }
  # Batch layer configuration
  batch = {
    # Streaming framework configuration
    streaming = {
      # Spark Streaming master. If local[n], make sure n >= 2
      master = "yarn-client"
      # Interval between runs of the computation layer. Default: 6 hours
      generation-interval-sec = 21600
      # Number of executors to start. In YARN-based deployments, this is a
      # maximum, and fewer executors may be used when the process is idle if
      # dynamic-allocation is enabled
      num-executors = 4
      # Cores per executor
      executor-cores = 4
      # Memory per executor
      executor-memory = "2g"
      # Heap size for the Batch driver process.
      driver-memory = "1g"
      # Enable dynamic allocation? YARN-only and not always desirable for streaming
      dynamic-allocation = false
      # Spark config key-value pairs
      config = ${oryx.default-streaming-config}
    }
    # An implementation of com.cloudera.oryx.api.batch.BatchLayerUpdate
    # which specifies what is done with current and historical data to update a model
    update-class = null
    storage = {
      # Directory where historical data is stored. Can be local, or on HDFS, etc.
      data-dir = "file:/tmp/Oryx/data/"
      # Directory where models are output. Can be local, or on HDFS, etc.
      model-dir = "file:/tmp/Oryx/model/"
      # Writable classes used to persist key/message, respectively
      key-writable-class = "org.apache.hadoop.io.Text"
      message-writable-class = "org.apache.hadoop.io.Text"
      # Data older than this many hours may be automatically deleted. -1 means no maximum.
      max-age-data-hours = -1
    }
    # Configuration for the Spark UI
    ui = {
      # UI port
      port = 4040
    }
  }
  # Speed layer configuration
  speed = {
    # Streaming framework configuration
    streaming = {
      # Spark Streaming master. If local[n], make sure n >= 2
      master = "yarn-client"
      # See tuning rule of thumb above
      # Interval between runs of the computation layer in seconds. Default: 10 seconds
      generation-interval-sec = 10
      # Number of executors to start. In YARN-based deployments, this is a
      # maximum, and fewer executors may be used when the process is idle if
      # dynamic-allocation is enabled
      num-executors = 2
      # Cores per executor
      executor-cores = 4
      # Memory per executor
      executor-memory = "1g"
      # Heap size for the Speed driver process.
      driver-memory = "512m"
      # Enable dynamic allocation? YARN-only and not always desirable for streaming
      dynamic-allocation = false
      # Spark config key-value pairs
      config = ${oryx.default-streaming-config}
    }
    # Implementation of com.cloudera.oryx.api.speed.SpeedModelManager interface that produces
    # updates from a SpeedModel and stream of input
    model-manager-class = null
    # See doc for serving.min-model-serve-fraction below
    min-model-load-fraction = 0.8
    # Configuration for the Spark UI
    ui = {
      # UI port
      port = 4040
    }
  }
  # Serving layer configuration
  serving = {
    # Memory to allocate for each serving layer instance. Must be in MB for now, ending with 'm'
    memory = "4000m"
    # Optional config when using YARN-based deployment
    yarn = {
      # Number of serving layers to run
      instances = 1
      # Cores to allocate for each serving layer instance
      cores = "4"
    }
    api = {
      # Default to use well-known HTTP port for Serving Layer
      port = 80
      # Default to use well-known HTTPS port for Serving Layer
      secure-port = 443
      # User name for connecting to the service, if required. If set, must be set with password.
      # If enabled, this will enable HTTP DIGEST authentication in the API.
      user-name = null
      # Password for connecting to the service, if required. If set, must be set with user-name.
      # If enabled, this will enable HTTP DIGEST authentication in the API.
      password = null
      # The keystore file containing the server's SSL keys. Only necessary when
      # accessing a server with temporary self-signed key, which is not trusted
      # by the Java SSL implementation.
      keystore-file = null
      # Password needed for keystore file above, if any
      keystore-password = null
      # If true, operations that set or modify data, like /ingest, are not available
      read-only = false
      # An optional prefix for the path under which the service is deployed. For
      # example, set to "/contextPath" to expose services at paths like "http://example.org/contextPath/..."
      context-path = "/"
    }
    # Where to load application JAX-RS resources (one or more comma-separated Java package names)
    application-resources = null
    # Implementation of com.cloudera.oryx.api.serving.ServingModelManager interface
    # that produces a ServingModel from stream of updates
    model-manager-class = null
    # Don't consider a model loaded and ready to use until this fraction of it is loaded.
    # Some models load incrementally (e.g. ALS). Others load all at once, in which case this
    # has no effect.
    min-model-load-fraction = 0.8
    # Test-only option; don't set this in general
    no-init-topics = false
  }
  # ML tier configuration
  ml = {
    # Model evaluation settings
    eval = {
      # Fraction of current data that is used for test, versus training
      test-fraction = 0.1
      # Increase to build more candidate models per run, and pick the best one
      candidates = 1
      # Number of models to build in parallel
      parallelism = 1
    }
  }
}
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
CDH5.8+Oryx2.2推荐系统环境搭建 1 一、 安装准备 1 1 准备4台主机搭建集群 1 2 在/etc/hosts增加(所有主机) 1 3 禁用IPV6(所有主机) 1 4 关闭防火墙(所有主机) 1 5 禁用selinux(所有主机) 2 6 配置时钟同步(所有主机) 2 7 配置.ssh免密码登录(所有主机) 2 二、 CDH安装 3 1 安装方式选择(PATH B方式,配置本地yum源安装): 3 2 软件、安装包下载 4 3 安装Oracle JDK1.8(全部主机) 4 4 安装mysql数据库(server60159) 4 5 配置mysql数据库为InnoDB模式 4 6 创建CDH相关数据库 6 7 下载mysql-jdbc驱动并做相应配置 7 8 搭建本地yum源 7 9 安装cloudera-manager-server(server60159) 7 10 安装cloudera-manager-agent(所有主机) 7 11 将mysql-jdbc驱动拷贝到需要的目录(server60159) 8 12 初始化mysql数据库-重要(server60159) 8 13 在agent主机上修改连接server主机的主机名(所有主机) 8 14 主机参数配置-附加部分(所有主机) 8 15 启动CDH服务 9 三、 安装CDH-Parcels(组件服务) 9 1 访问安装页面 9 2 配置本地Parcels(server60159) 9 3 选择安装的组件(一些步骤没有截图) 10 4 安装过程中需要配置相关数据库 10 5 安装完成,提示成功 11 四、 安装kafka 11 1 下载软件 11 2 修改配置 11 3 启动服务 11 五、 安装Oryx 11 1 下载软件 12 2 下载配置文件oryx.conf 12 3 创建kafka主题(topic) 12 4 启动Oryx 12 5 遇到错误处理解决(可能远不止这些错误) 13 6 推送kafka数据 13 7 访问Oryx 13 六、 联系作者 14

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值