前面介绍过通过https://github.com/alexjbush/livy_zeppelin_cdh_csd_parcels编译安装zeppelin。zeppelin安装完,配置用户角色,新建spark任务运行,一切看似都很完美。某天要修改个配置重启zeppelin登录发现以前所有配置都丢失了,又重新配上,如此反复让人很崩溃。排查丢失原因,只有在重启后才会丢失,可能是重启命令执行了某项操作导致配置文件丢失。在/opt/cloudera/csd文件下将zeppelin csd jar包下载下来打开查看启动脚本,在control.py文件中发现了问题,每次启动时都会重新上传新的配置文件覆盖以前的配置。原代码及修改后的python代码(增加了判断文件存在就不执行命令)如下:
def start():
log("Attempting to start")
# Base config
(base_conf_dir, zeppelin_conf_dir) = base_conf()
#Start specific conf
livy_conf_dir = "%s/%LIVYSERVICENAMELOWER%-conf" % base_conf_dir
error_if_missing(livy_conf_dir)
livy_conf_file = "%s/server.properties" % livy_conf_dir
error_if_missing(livy_conf_file)
server_props_string = read_file(livy_conf_file)
livy_interpreter_json_filename = "%s/interpreter.livy.json" % zeppelin_conf_dir
error_if_missing(livy_interpreter_json_filename)
livy_interpreter_json_string = read_file(livy_interpreter_json_filename)
livy_properties_filename = "%s/livy.properties" % zeppelin_conf_dir
error_if_missing(livy_properties_filename)
livy_properties_string = read_file(livy_properties_filename)
livy_conf_struct = generate_livy_conf_struct("spark", "2CYCWRZPP", "livy", server_props_string,
livy_interpreter_json_string, livy_properties_string)
livy2_conf_struct = generate_livy_conf_struct("spark2", "2CYCWDZPP", "livy2", server_props_string,
livy_interpreter_json_string, livy_properties_string)
livy_interpreters = []
if livy_conf_struct is not None:
livy_interpreters.append(livy_conf_struct)
if livy2_conf_struct is not None:
livy_interpreters.append(livy2_conf_struct)
interpreter_json_filename = "%s/interpreter.json" % zeppelin_conf_dir
error_if_missing(interpreter_json_filename)
interpreter_string = read_file(interpreter_json_filename)
interpreter_struct = merge_livy_confs_into_interpreter(interpreter_string, livy_interpreters)
completed_filename = "%s/interpreter.json" % zeppelin_conf_dir
with open(completed_filename, 'w+') as f:
json.dump(interpreter_struct, f)
run_command("hdfs dfs -mkdir -p %s" % (os.environ["ZEPPELIN_CONF_FS_DIR"]))
run_command("hdfs dfs -put -f %s %s" % (completed_filename, os.environ["ZEPPELIN_CONF_FS_DIR"]))
run_command("hdfs dfs -mkdir -p %s" % (os.environ["ZEPPELIN_NOTEBOOK_DIR"]))
shiro_filename = "%s/shiro.ini" % zeppelin_conf_dir
error_if_missing(interpreter_json_filename)
if os.environ["ZEPPELIN_SHIRO_ENABLED"] == "false":
shutil.move(shiro_filename, "%s.template" % shiro_filename)
#Run zeppelin, this seems to leak threads, look for a way to clean up properly
run_command("%s/bin/zeppelin-daemon.sh --config %s start" % (os.environ["ZEPPELIN_HOME"], os.environ["ZEPPELIN_CONF_DIR"]))
#Wait for PID to stop
run_command("tail --pid=$( cat %s/zeppelin-zeppelin-*.pid) -f /dev/null" % os.environ["ZEPPELIN_PID_DIR"])
return
def start():
log("Attempting to start")
# Base config
(base_conf_dir, zeppelin_conf_dir) = base_conf()
#Start specific conf
livy_conf_dir = "%s/livy-conf" % base_conf_dir
error_if_missing(livy_conf_dir)
livy_conf_file = "%s/server.properties" % livy_conf_dir
error_if_missing(livy_conf_file)
server_props_string = read_file(livy_conf_file)
livy_interpreter_json_filename = "%s/interpreter.livy.json" % zeppelin_conf_dir
error_if_missing(livy_interpreter_json_filename)
livy_interpreter_json_string = read_file(livy_interpreter_json_filename)
livy_properties_filename = "%s/livy.properties" % zeppelin_conf_dir
error_if_missing(livy_properties_filename)
livy_properties_string = read_file(livy_properties_filename)
livy_conf_struct = generate_livy_conf_struct("spark", "2CYCWRZPP", "livy", server_props_string,
livy_interpreter_json_string, livy_properties_string)
livy2_conf_struct = generate_livy_conf_struct("spark2", "2CYCWDZPP", "livy2", server_props_string,
livy_interpreter_json_string, livy_properties_string)
livy_interpreters = []
if livy_conf_struct is not None:
livy_interpreters.append(livy_conf_struct)
if livy2_conf_struct is not None:
livy_interpreters.append(livy2_conf_struct)
interpreter_json_filename = "%s/interpreter.json" % zeppelin_conf_dir
error_if_missing(interpreter_json_filename)
interpreter_string = read_file(interpreter_json_filename)
interpreter_struct = merge_livy_confs_into_interpreter(interpreter_string, livy_interpreters)
completed_filename = "%s/interpreter.json" % zeppelin_conf_dir
with open(completed_filename, 'w+') as f:
json.dump(interpreter_struct, f)
if not os.path.exists(os.environ["ZEPPELIN_CONF_FS_DIR"][7:]):
run_command("hdfs dfs -mkdir -p %s" % (os.environ["ZEPPELIN_CONF_FS_DIR"]))
if not os.path.exists("%s/interpreter.json" %(os.environ["ZEPPELIN_CONF_FS_DIR"][7:])):
run_command("hdfs dfs -put -f %s %s" % (completed_filename, os.environ["ZEPPELIN_CONF_FS_DIR"]))
if not os.path.exists(os.environ["ZEPPELIN_NOTEBOOK_DIR"][7:]):
run_command("hdfs dfs -mkdir -p %s" % (os.environ["ZEPPELIN_NOTEBOOK_DIR"]))
shiro_filename = "%s/shiro.ini" % zeppelin_conf_dir
error_if_missing(interpreter_json_filename)
if os.environ["ZEPPELIN_SHIRO_ENABLED"] == "false":
shutil.move(shiro_filename, "%s.template" % shiro_filename)
#Run zeppelin, this seems to leak threads, look for a way to clean up properly
run_command("%s/bin/zeppelin-daemon.sh --config %s start" % (os.environ["ZEPPELIN_HOME"], os.environ["ZEPPELIN_CONF_DIR"]))
#Wait for PID to stop
run_command("tail --pid=$( cat %s/zeppelin-zeppelin-*.pid) -f /dev/null" % os.environ["ZEPPELIN_PID_DIR"])
return
修改源代码后替换,文件重启cm,重启zeppelin 配置文件没有被覆盖,问题修复