一、superset下载安装
#下载链接:
https://github.com/apache/superset
#进入到superset目录下直接执行如下命令
python setup.py install
#创建管理员账号
python superset fab create-admin
#初始化数据库
superset db upgrade
#加载例子
superset load_examples
#初始化
superset init
#启动superset 指定端口
superset run -h 0.0.0.0 -p 8088 --with-threads --debugger
#后台启动
nohup superset run -h 0.0.0.0 -p 8088 --with-threads --debugger &
二、Python 虚拟环境安装
参考官网:https://superset.apache.org/docs/installation/installing-superset-from-scratch
#下载Linux依赖
yum install gcc gcc-c++ libffi-devel python-devel python-pip python-wheel openssl-devel cyrus-sasl-devel openldap-devel
#Python 虚拟环境
pip install virtualenv
#创建superset环境(最好进入到指定目录后再创建)
python3 -m venv superset
#进入虚拟环境
. superset/bin/activate
#安装superset
pip install apache-superset
pip install apache-superset -i https://pypi.tuna.tsinghua.edu.cn/simple/
#如果下载过慢 可以使用国内镜像
(1)阿里云 http://mirrors.aliyun.com/pypi/simple/
(2)豆瓣http://pypi.douban.com/simple/
(3)清华大学 https://pypi.tuna.tsinghua.edu.cn/simple/
(4)中国科学技术大学 http://pypi.mirrors.ustc.edu.cn/simple/
(5)华中科技大学http://pypi.hustunique.com/
#设置FLASK_APP
export FLASK_APP=superset
#初始化db
superset db upgrade
#创建admin用户
superset fab create-admin
#加载例子
superset load_examples
#superset初始化
superset init
#测试启动
superset run -p 8088 --with-threads --reload --debugger
#退出虚拟环境
deactivate
#以下是个人比较喜欢的方式
##需要先安装gunicorn
pip install gunicorn -i https://pypi.douban.com/simple/
#1、进入虚拟环境
. superset/bin/activate
#2、后台启动
gunicorn --workers 10 --timeout 120 --bind 0.0.0.0:8088 "superset.app:create_app()" --daemon
#3、退出虚拟环境
deactivate
#4、停止superset
ps -ef | awk '/superset/ && !/awk/{print $2}' | xargs kill -9
三、汉化
3.1 编辑config.py文件
3.2 下载messages.po 并替换到如下目录下
链接:superset/messages.po at master · apache/superset · GitHub
目录: lib/python3.6/site-packages/superset/translations/zh/LC_MESSAGES
3.2 文件编译
cd lib/python3.6/site-packages/superset
pybabel compile -d translations
3.3 重启superset
四、扩展
添加database连接
presto
presto://ip:port/hive
trino
trino://ip:port/hive
clickhouse
trino://ip:port/clickhouse
clickhouse
clickhouse://default:密码@ip:port
#修改缓存格式
#配置superset的config.py
CACHE_DEFAULT_TIMEOUT = 60 * 60 * 8
#设置缓存数据数据类型
CACHE_CONFIG = {'CACHE_TYPE': 'filesystem'}
TABLE_NAMES_CACHE_CONFIG = {'CACHE_TYPE': 'filesystem'}
#配置flask_caching的 __init.py
有些服务使用的flask_cache,用anaconda安装的superset自带flask_caching
#配置缓存时间
config.setdefault("CACHE_DEFAULT_TIMEOUT", 3600*8)
config.setdefault("CACHE_IGNORE_ERRORS", False)
#配置缓存的文件数
config.setdefault("CACHE_THRESHOLD", 500)
config.setdefault("CACHE_KEY_PREFIX", "flask_cache_")
config.setdefault("CACHE_MEMCACHED_SERVERS", None)
#配置缓存的文件地址
config.setdefault("CACHE_DIR", '/data/superset/cache/')
config.setdefault("CACHE_OPTIONS", None)
config.setdefault("CACHE_ARGS", [])
#配置缓存的数据类型
config.setdefault("CACHE_TYPE", "filesystem")
config.setdefault("CACHE_NO_NULL_WARNING", False)
五、遇到的问题
# pip版本过低 需要升级pip版本
pip install --upgrade pip
#ModuleNotFoundError: No module named 'dataclasses'
pip install dataclasses
#ModuleNotFoundError: No module named 'Cython'
pip install Cython -i https://pypi.douban.com/simple
#SyntaxError: future feature annotations is not defined
python3.6版本问题 升级Python版本
#error: cachelib 0.4.1 is installed but cachelib>=0.9.0 is required by {'flask-caching'}
pip install cachelib==0.9.0
或者直接修改superset根目录下setup.py文件 (其他插件也可以这样操作)
"cachelib>=0.9.0"
#Error: Could not locate a Flask application. You did not provide the "FLASK_APP" environment variable, and a "wsgi.py" or "app.py" module was not found in the current directory.
export FLASK_APP=superset
#ImportError: cannot import name 'RowProxy' from 'sqlalchemy.engine'
pip install sqlalchemy==1.3.23 -i http://mirrors.aliyun.com/pypi/simple/ --trusted-host mirrors.aliyun.com
#因为clickhouse-sqlalchemy ==0.2.x的版本需要sqlalchemy>1.4 会与上面的sqlalchemy==1.3.23产生冲突 所以需要安装clickhouse-sqlalchemy===0.1.x版本
pip install clickhouse-sqlalchemy==0.1.10
#module 'sqlalchemy_clickhouse.base' has no attribute 'name'
pip install sqlalchemy-clickhouse
python3 -m pip install flask-sqlalchemy
#AttributeError: module 'sqlparse.keywords' has no attribute 'FLAGS'
pip3 install sqlparse=='0.4.3' -i http://mirrors.aliyun.com/pypi/simple/ --trusted-host mirrors.aliyun.com
#ModuleNotFoundError: No module named 'marshmallow_enum'
pip3 install marshmallow_enum -i http://mirrors.aliyun.com/pypi/simple/ --trusted-host mirrors.aliyun.com
#NameError: name '_mysql' is not defined
pip install pymysql
#superset中mysql的链接方式换成下面这种
mysql+pymysql://root:root@127.0.0.1/test
#如果上面不起作用 可以降低版本试试
pip install mysqlclient==1.4.6 -i http://mirrors.aliyun.com/pypi/simple/ --trusted-host mirrors.aliyun.com
#ModuleNotFoundError: No module named 'werkzeug.wrappers.etag'
pip3 uninstall Flask
pip3 uninstall Werkzeug
pip3 install Flask==2.0.2
pip3 install Werkzeug==2.0.2
# 报错:Public key for mysql-community-common-8.0.30-1.el7.x86_64.rpm is not installed
rpm --import https://repo.mysql.com/RPM-GPG-KEY-mysql-2022
#注意:2022是当前年份
#No PIL installation found
pip install pillow
#Flask-Caching: CACHE_TYPE is set to null, caching is effectively disabled
THUMBNAIL_CACHE_CONFIG: CacheConfig = {"CACHE_TYPE": "null"}
修改为
THUMBNAIL_CACHE_CONFIG: CacheConfig = {"CACHE_TYPE": "null", "CACHE_NO_NULL_WARNING": True}
CACHE_CONFIG: CacheConfig = {"CACHE_TYPE": "null"}
TABLE_NAMES_CACHE_CONFIG: CacheConfig = {"CACHE_TYPE": "null"}
修改为
CACHE_CONFIG: CacheConfig = {"CACHE_TYPE": "simple"}
TABLE_NAMES_CACHE_CONFIG: CacheConfig = {"CACHE_TYPE": "simple"}
#配置presto驱动后页面配置数据库找不到presto源
#报错如下:superset ERROR: Could not load database driver: presto
pip install 'pyhive'
#如果不能解决问题再执行下面的
pip install 'pyhive[hive]'
pip install 'pyhive[presto]'
pip install 'pyhive[trino]'
#配置trino源
pip install sqlalchemy-trino
#阿里云源
pip install sqlalchemy-trino -i http://mirrors.aliyun.com/pypi/simple/ --trusted-host mirrors.aliyun.com
#错误: (builtins.NoneType) None
[SQL: error 401: b'Basic authentication or X-Trino-User must be sent']
(Background on this error at: https://sqlalche.me/e/14/dbapi)
vim superset/lib/python3.9/site-packages/pyhive/presto.py
headers = {
'X-Presto-Catalog': self._catalog,
'X-Presto-Schema': self._schema,
'X-Presto-Source': self._source,
'X-Presto-User': self._username,
'X-Trino-Catalog': self._catalog,
'X-Trino-Schema': self._schema,
'X-Trino-Source': self._source,
'X-Trino-User': self._username,
}
if self._session_props:
headers['X-Presto-Session'] = headers['X-Trino-Session'] = ','.join(
'{}={}'.format(propname, propval)
for propname, propval in self._session_props.items()
)
如果上面修改后还是报错superset.exceptions.SupersetErrorsException: [SupersetError(message="(builtins.NoneType) None\n[SQL: error 401: b'Basic authentication or X-Trino-User must be sent']\n(Background on this error at: https://sqlalche.me/e/14/dbapi)", error_type=<SupersetErrorType.GENERIC_DB_ENGINE_ERROR: 'GENERIC_DB_ENGINE_ERROR'>, level=<ErrorLevel.ERROR: 'error'>, extra={'engine_name': 'Trino', 'issue_codes': [{'code': 1002, 'message': 'Issue 1002 - 数据库返回意外错误。'}]})]
添加数据源链接需要改下:
trino://用户名@ip:端口/hive
#连presto,clickhouse 会遇到连不上问题,需要执行以下命令:
pip install infi.clickhouse_orm==1.0.4
pip install clickhouse-sqlalchemy
#其他源配置参考
https://superset.apache.org/docs/databases/installing-database-drivers/
解决方式:
在superset安装目录下创建superset_config.py文件
# Superset specific config
# SS 相关的配置
# 行数限制 10000 行
ROW_LIMIT = 10000
# 网站服务器端口 8088,该端口被Hadoop占用,改为其他端口比如18088
SUPERSET_WEBSERVER_PORT = 8088
# Flask App Builder configuration
# Your App secret key will be used for securely signing the session cookie
# and encrypting sensitive information on the database
# Make sure you are changing this key for your deployment with a strong key.
# You can generate a strong key using `openssl rand -base64 42`
# Flask 应用构建器配置
# 应用密钥用来保护会话 cookie 的安全签名
# 并且用来加密数据库中的敏感信息
# 请确保在你的部署环境选择一个强密钥
# 可以使用命令 openssl rand -base64 42 来生成一个强密钥
SECRET_KEY = "wcFgR++bD5A5tbD3uS7+iRs0J2uW+X5Pd7NaKqt+/OoJCU3cpVL5RdPK"
# The SQLAlchemy connection string to your database backend
# This connection defines the path to the database that stores your
# superset metadata (slices, connections, tables, dashboards, ...).
# Note that the connection information to connect to the datasources
# you want to explore are managed directly in the web UI
# SQLAlchemy 数据库连接信息
# 这个连接信息定义了 SS 元数据库的路径(切片、连接、表、数据面板等等)
# 注意:需要探索的数据源连接及数据库连接直接通过网页界面进行管理
#SQLALCHEMY_DATABASE_URI = 'sqlite:path/to/superset.db'
# Flask-WTF flag for CSRF
# 跨域请求攻击标识
WTF_CSRF_ENABLED = True
# Add endpoints that need to be exempt from CSRF protection
# CSRF 白名单
WTF_CSRF_EXEMPT_LIST = []
# A CSRF token that expires in 3 year
# CSFR 令牌过期时间 3 年
WTF_CSRF_TIME_LIMIT = 60 * 60 * 24 * 365 * 3
# Set this API key to enable Mapbox visualizations
# 接口密钥用来启用 Mapbox 可视化
在/etc/profile 文件中新增下面一行,并执行source /etc/profile
export SUPERSET_CONFIG_PATH=/data/superset/superset_config.py
问题:
sqlalchemy.exc.OperationalError: (sqlite3.OperationalError) near "extra": syntax error
解决方式:
vim superset/lib/python3.9/site-packages/superset/migrations/versions/2022-07-11_11-26_ffa79af61a56_rename_report_schedule_extra_to_extra_.py
def upgrade():
op.alter_column(
"report_schedule",
"extra",
new_column_name="extra_json",
# existing info is required for MySQL
existing_type=Text,
existing_nullable=True,
)
改成
def upgrade():
with op.batch_alter_table('report_schedule') as bop:
bop.alter_column(
'extra',
new_column_name='extra_json',
# existing info is required for MySQL
existing_type=Text,
existing_nullable=True,
)
如果上面还不能解决问题 直接换成mysql数据源,修改superset_config.py文件
SQLALCHEMY_DATABASE_URI = 'mysql+pymysql://user:password@ip/superset'
修改时区:
DRUID_TZ = tz.tzutc() 改为 DRUID_TZ = tz.gettz('Asia/Shanghai')
superset查询的数据点CSV下载时页面出现下图:
查报错日志 :UnicodeEncodeError: 'latin-1' codec can't encode characters in position 208-213: ordinal not in range(256)
解决方法如下: