0) Prerequisite
## start hadoop with yarn
## for hive, start remote metastore and hiveserver2
## for oozie, start EmbeddedOozieServer
$ jps
7409 NameNode
7494 DataNode
7600 SecondaryNameNode
7738 ResourceManager
7824 NodeManager
7876 JobHistoryServer
8047 RunJar
8102 RunJar
9963 EmbeddedOozieServer
note: 2 RunJar processes are remote metastore and hiveserver2
1) integration
## modify desktop/conf/pseudo-distributed.ini
$ cd ~/work/hue/desktop/conf
$ diff -u pseudo-distributed.ini.tmpl pseudo-distributed.ini
## integrate hdfs and yarn
## [hadoop] -> [[hdfs_clusters]] -> [[[default]]]
@@ -1175,7 +1174,7 @@
[[[default]]]
# Enter the filesystem uri
- fs_defaultfs=hdfs://localhost:8020
+ fs_defaultfs=hdfs://localhost:9000
# NameNode logical name.
## logical_name=
@@ -1183,7 +1182,7 @@
# Use WebHdfs/HttpFs as the communication mechanism.
# Domain should be the NameNode or HttpFs host.
# Default port is 14000 for HttpFs.
- ## webhdfs_url=http://localhost:50070/webhdfs/v1
+ webhdfs_url=http://localhost:50070/webhdfs/v1
# Change this if your HDFS cluster is Kerberos-secured
## security_enabled=false
@@ -1193,7 +1192,7 @@
## ssl_cert_ca_verify=True
# Directory of the Hadoop configuration
- ## hadoop_conf_dir=$HADOOP_CONF_DIR when set or '/etc/hadoop/conf'
+ hadoop_conf_dir=/Users/sun_xo/work/hadoop/etc/hadoop
# Whether Hue should list this HDFS cluster. For historical reason there is no way to disable HDFS.
## is_enabled=true
## [hadoop] -> [[yarn_clusters]] -> [[[default]]]
@@ -1204,10 +1203,10 @@
[[[default]]]
# Enter the host on which you are running the ResourceManager
- ## resourcemanager_host=localhost
+ resourcemanager_host=localhost
# The port where the ResourceManager IPC listens on
- ## resourcemanager_port=8032
+ resourcemanager_port=8032
# Whether to submit jobs to this cluster
submit_to=True
@@ -1219,13 +1218,13 @@
## security_enabled=false
# URL of the ResourceManager API
- ## resourcemanager_api_url=http://localhost:8088
+ resourcemanager_api_url=http://localhost:8088
# URL of the ProxyServer API
- ## proxy_api_url=http://localhost:8088
+ proxy_api_url=http://localhost:8088
# URL of the HistoryServer API
- ## history_server_api_url=http://localhost:19888
+ history_server_api_url=http://localhost:19888
# URL of the Spark History Server
## spark_history_server_url=http://localhost:18088
## [filebrowser]
@@ -1545,7 +1544,7 @@
[filebrowser]
# Location on local filesystem where the uploaded archives are temporary stored.
- ## archive_upload_tempdir=/tmp
+ archive_upload_tempdir=/tmp
# Show Download Button for HDFS file browser.
## show_download_button=true
## integrate hive
## [notebook] -> [[interpreters]] -> [[[hive]]]
@@ -957,16 +957,15 @@
# Define the name and how to connect and execute the language.
# https://docs.gethue.com/administrator/configuration/editor/
...
- # [[[hive]]]
- # name=Hive
- # interface=hiveserver2
...
+ [[[hive]]]
+ name=Hive
+ interface=hiveserver2
# [[[hplsql]]]
# name=Hplsql
## [beeswax]
@@ -1262,10 +1261,10 @@
# Host where HiveServer2 is running.
# If Kerberos security is enabled, use fully-qualified domain name (FQDN).
- ## hive_server_host=localhost
+ hive_server_host=localhost
# Port where HiveServer2 Thrift server runs on.
- ## hive_server_port=10000
+ hive_server_port=10000
# Http thrift port for HiveServer2.
## hive_server_http_port=10001
@@ -1309,16 +1308,16 @@
# Host where Hive Metastore Server (HMS) is running.
# If Kerberos security is enabled, the fully-qualified domain name (FQDN) is required.
- ## hive_metastore_host=localhost
+ hive_metastore_host=localhost
# Configure the port the Hive Metastore Server runs on.
- ## hive_metastore_port=9083
+ hive_metastore_port=9083
# Hive configuration directory, where hive-site.xml is located
- ## hive_conf_dir=/etc/hive/conf
+ hive_conf_dir=/Users/sun_xo/work/hive-2.3.9/conf
# Timeout in seconds for thrift calls to Hive service
- ## server_conn_timeout=120
+ server_conn_timeout=120
# Choose whether to use the old GetLog() thrift call from before Hive 0.14 to retrieve the logs.
# If false, use the FetchResults() thrift call from Hive 1.0 or more instead.
@@ -1354,7 +1353,7 @@
# Thrift version to use when communicating with HiveServer2.
# Version 11 comes with Hive 3.0. If issues, try 7.
- ## thrift_version=11
+ thrift_version=7
# A comma-separated list of white-listed Hive configuration properties that users are authorized to set.
## config_whitelist=hive.map.aggr,hive.exec.compress.output,hive.exec.parallel,hive.execution.engine,mapreduce.job.queuename
## integrate mysql and postgresql
## [notebook] -> [[interpreters]] -> [[[mysql]]], [[[postgresql]]]
@@ -957,16 +957,15 @@
# Define the name and how to connect and execute the language.
# https://docs.gethue.com/administrator/configuration/editor/
- # [[[mysql]]]
- # name = MySQL
- # interface=sqlalchemy
- # ## https://docs.sqlalchemy.org/en/latest/dialects/mysql.html
- # options='{"url": "mysql://root:secret@database:3306/hue"}'
- # ## options='{"url": "mysql://${USER}:${PASSWORD}@localhost:3306/hue"}'
...
+ [[[mysql]]]
+ name = MySQL
+ interface=sqlalchemy
+ ## https://docs.sqlalchemy.org/en/latest/dialects/mysql.html
+ options='{"url": "mysql://manga:manga@localhost:3306/manga"}'
...
# [[[hplsql]]]
# name=Hplsql
@@ -980,10 +979,10 @@
# name=Impala
# interface=hiveserver2
- # [[[postgresql]]]
- # name = postgresql
- # interface=sqlalchemy
- # options='{"url": "postgresql://hue:hue@host:5432/hue"}'
+ [[[postgresql]]]
+ name = postgresql
+ interface=sqlalchemy
+ options='{"url": "postgresql://hue_u:huepassword@localhost:5432/hue_d"}'
# [[[druid]]]
# name = Druid
## integrate oozie
## [oozie]
@@ -1511,17 +1510,17 @@
[oozie]
# Location on local FS where the examples are stored.
- ## local_data_dir=..../examples
+ local_data_dir=/Users/sun_xo/work/oozie-5.2.1/oozie/apps
# Location on local FS where the data for the examples is stored.
- ## sample_data_dir=...thirdparty/sample_data
+ sample_data_dir=/Users/sun_xo/work/oozie-5.2.1/oozie/data
# Location on HDFS where the oozie examples and workflows are stored.
# Parameters are $TIME and $USER, e.g. /user/$USER/hue/workspaces/workflow-$TIME
- ## remote_data_dir=/user/hue/oozie/workspaces
+ remote_data_dir=/user/sun_xo/oozie/apps
# Maximum of Oozie workflows or coodinators to retrieve in one API call.
- ## oozie_jobs_count=100
+ oozie_jobs_count=100
# Use Cron format for defining the frequency of a Coordinator instead of the old frequency number/unit.
## enable_cron_scheduling=true
## [liboozie]
@@ -1807,13 +1806,13 @@
[liboozie]
# The URL where the Oozie service runs on. This is required in order for
# users to submit jobs. Empty value disables the config check.
- ## oozie_url=http://localhost:11000/oozie
+ oozie_url=http://localhost:11000/oozie
# Requires FQDN in oozie_url if enabled
## security_enabled=false
# Location on HDFS where the workflows/coordinator are deployed when submitted.
- ## remote_deployement_dir=/user/hue/oozie/deployments
+ remote_deployement_dir=/user/sun_xo/oozie/apps
## note: after restart hue, verify oozie integration as following
## Scheduler -> Workflow -> "Oozie Editor" -> select and click a workflow