Hue 集成 hdfs / yarn / hive / rdbms / oozie

0) Prerequisite
## start hadoop with yarn
## for hive, start remote metastore and hiveserver2
## for oozie, start EmbeddedOozieServer
$ jps

7409 NameNode
7494 DataNode
7600 SecondaryNameNode
7738 ResourceManager
7824 NodeManager
7876 JobHistoryServer
8047 RunJar
8102 RunJar
9963 EmbeddedOozieServer

note: 2 RunJar processes are remote metastore and hiveserver2

1) integration
## modify desktop/conf/pseudo-distributed.ini
$ cd ~/work/hue/desktop/conf
$ diff -u pseudo-distributed.ini.tmpl pseudo-distributed.ini

## integrate hdfs and yarn
## [hadoop] -> [[hdfs_clusters]] -> [[[default]]]

@@ -1175,7 +1174,7 @@
 
     [[[default]]]
       # Enter the filesystem uri
-      fs_defaultfs=hdfs://localhost:8020
+      fs_defaultfs=hdfs://localhost:9000
 
       # NameNode logical name.
       ## logical_name=
@@ -1183,7 +1182,7 @@
       # Use WebHdfs/HttpFs as the communication mechanism.
       # Domain should be the NameNode or HttpFs host.
       # Default port is 14000 for HttpFs.
-      ## webhdfs_url=http://localhost:50070/webhdfs/v1
+      webhdfs_url=http://localhost:50070/webhdfs/v1
 
       # Change this if your HDFS cluster is Kerberos-secured
       ## security_enabled=false
@@ -1193,7 +1192,7 @@
       ## ssl_cert_ca_verify=True
 
       # Directory of the Hadoop configuration
-      ## hadoop_conf_dir=$HADOOP_CONF_DIR when set or '/etc/hadoop/conf'
+      hadoop_conf_dir=/Users/sun_xo/work/hadoop/etc/hadoop
 
       # Whether Hue should list this HDFS cluster. For historical reason there is no way to disable HDFS.
       ## is_enabled=true

## [hadoop] -> [[yarn_clusters]] -> [[[default]]]

@@ -1204,10 +1203,10 @@
 
     [[[default]]]
       # Enter the host on which you are running the ResourceManager
-      ## resourcemanager_host=localhost
+      resourcemanager_host=localhost
 
       # The port where the ResourceManager IPC listens on
-      ## resourcemanager_port=8032
+      resourcemanager_port=8032
 
       # Whether to submit jobs to this cluster
       submit_to=True
@@ -1219,13 +1218,13 @@
       ## security_enabled=false
 
       # URL of the ResourceManager API
-      ## resourcemanager_api_url=http://localhost:8088
+      resourcemanager_api_url=http://localhost:8088
 
       # URL of the ProxyServer API
-      ## proxy_api_url=http://localhost:8088
+      proxy_api_url=http://localhost:8088
 
       # URL of the HistoryServer API
-      ## history_server_api_url=http://localhost:19888
+      history_server_api_url=http://localhost:19888
 
       # URL of the Spark History Server
       ## spark_history_server_url=http://localhost:18088

## [filebrowser]

@@ -1545,7 +1544,7 @@
 
 [filebrowser]
   # Location on local filesystem where the uploaded archives are temporary stored.
-  ## archive_upload_tempdir=/tmp
+  archive_upload_tempdir=/tmp
 
   # Show Download Button for HDFS file browser.
   ## show_download_button=true

## integrate hive
## [notebook] -> [[interpreters]] -> [[[hive]]]

@@ -957,16 +957,15 @@
     # Define the name and how to connect and execute the language.
     # https://docs.gethue.com/administrator/configuration/editor/
     ...
-    # [[[hive]]]
-    #   name=Hive
-    #   interface=hiveserver2
     ...
+    [[[hive]]]
+      name=Hive
+      interface=hiveserver2
 
     # [[[hplsql]]]
     #   name=Hplsql

## [beeswax]

@@ -1262,10 +1261,10 @@
 
   # Host where HiveServer2 is running.
   # If Kerberos security is enabled, use fully-qualified domain name (FQDN).
-  ## hive_server_host=localhost
+  hive_server_host=localhost
 
   # Port where HiveServer2 Thrift server runs on.
-  ## hive_server_port=10000
+  hive_server_port=10000
 
   # Http thrift port for HiveServer2.
   ## hive_server_http_port=10001
@@ -1309,16 +1308,16 @@
 
   # Host where Hive Metastore Server (HMS) is running.
   # If Kerberos security is enabled, the fully-qualified domain name (FQDN) is required.
-  ## hive_metastore_host=localhost
+  hive_metastore_host=localhost
 
   # Configure the port the Hive Metastore Server runs on.
-  ## hive_metastore_port=9083
+  hive_metastore_port=9083
 
   # Hive configuration directory, where hive-site.xml is located
-  ## hive_conf_dir=/etc/hive/conf
+  hive_conf_dir=/Users/sun_xo/work/hive-2.3.9/conf
 
   # Timeout in seconds for thrift calls to Hive service
-  ## server_conn_timeout=120
+  server_conn_timeout=120
 
   # Choose whether to use the old GetLog() thrift call from before Hive 0.14 to retrieve the logs.
   # If false, use the FetchResults() thrift call from Hive 1.0 or more instead.
@@ -1354,7 +1353,7 @@
 
   # Thrift version to use when communicating with HiveServer2.
   # Version 11 comes with Hive 3.0. If issues, try 7.
-  ## thrift_version=11
+  thrift_version=7
 
   # A comma-separated list of white-listed Hive configuration properties that users are authorized to set.
   ## config_whitelist=hive.map.aggr,hive.exec.compress.output,hive.exec.parallel,hive.execution.engine,mapreduce.job.queuename

## integrate mysql and postgresql
## [notebook] -> [[interpreters]] -> [[[mysql]]], [[[postgresql]]]

@@ -957,16 +957,15 @@
     # Define the name and how to connect and execute the language.
     # https://docs.gethue.com/administrator/configuration/editor/
 
-    # [[[mysql]]]
-    #   name = MySQL
-    #   interface=sqlalchemy
-    #   ## https://docs.sqlalchemy.org/en/latest/dialects/mysql.html
-    #   options='{"url": "mysql://root:secret@database:3306/hue"}'
-    #   ## options='{"url": "mysql://${USER}:${PASSWORD}@localhost:3306/hue"}'
     ...    
+    [[[mysql]]]
+      name = MySQL
+      interface=sqlalchemy
+      ## https://docs.sqlalchemy.org/en/latest/dialects/mysql.html
+      options='{"url": "mysql://manga:manga@localhost:3306/manga"}'
     ...
 
     # [[[hplsql]]]
     #   name=Hplsql
@@ -980,10 +979,10 @@
     #   name=Impala
     #   interface=hiveserver2
 
-    # [[[postgresql]]]
-    #   name = postgresql
-    #   interface=sqlalchemy
-    #   options='{"url": "postgresql://hue:hue@host:5432/hue"}'
+    [[[postgresql]]]
+      name = postgresql
+      interface=sqlalchemy
+      options='{"url": "postgresql://hue_u:huepassword@localhost:5432/hue_d"}'
 
     # [[[druid]]]
     #   name = Druid

## integrate oozie
## [oozie]

@@ -1511,17 +1510,17 @@
 
 [oozie]
   # Location on local FS where the examples are stored.
-  ## local_data_dir=..../examples
+  local_data_dir=/Users/sun_xo/work/oozie-5.2.1/oozie/apps
 
   # Location on local FS where the data for the examples is stored.
-  ## sample_data_dir=...thirdparty/sample_data
+  sample_data_dir=/Users/sun_xo/work/oozie-5.2.1/oozie/data
 
   # Location on HDFS where the oozie examples and workflows are stored.
   # Parameters are $TIME and $USER, e.g. /user/$USER/hue/workspaces/workflow-$TIME
-  ## remote_data_dir=/user/hue/oozie/workspaces
+  remote_data_dir=/user/sun_xo/oozie/apps
 
   # Maximum of Oozie workflows or coodinators to retrieve in one API call.
-  ## oozie_jobs_count=100
+  oozie_jobs_count=100
 
   # Use Cron format for defining the frequency of a Coordinator instead of the old frequency number/unit.
   ## enable_cron_scheduling=true

## [liboozie]

@@ -1807,13 +1806,13 @@
 [liboozie]
   # The URL where the Oozie service runs on. This is required in order for
   # users to submit jobs. Empty value disables the config check.
-  ## oozie_url=http://localhost:11000/oozie
+  oozie_url=http://localhost:11000/oozie
 
   # Requires FQDN in oozie_url if enabled
   ## security_enabled=false
 
   # Location on HDFS where the workflows/coordinator are deployed when submitted.
-  ## remote_deployement_dir=/user/hue/oozie/deployments
+  remote_deployement_dir=/user/sun_xo/oozie/apps

## note: after restart hue, verify oozie integration as following
## Scheduler -> Workflow -> "Oozie Editor" -> select and click a workflow

  • 1
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值