文章目录
一、准备
re_data官方文档
完成dbt的安装及测试
可参考文章:DBT的安装及测试(基础)
配置文件的修改:
1、项目profile_test目录下的dbt_project.yml
profile_test这个名字需要和 /root/.dbt/profiles.yml 文件配置的项目名一致
name: 'profile_test'
version: '1.0.0'
config-version: 2
profile: 'profile_test'
model-paths: ["models"]
analysis-paths: ["analyses"]
test-paths: ["tests"]
docs-paths: ["docs"]
seed-paths: ["seeds"]
macro-paths: ["macros"]
snapshot-paths: ["snapshots"]
target-path: "target" # directory which will store compiled SQL files
clean-targets: # directories to be removed by `dbt clean`
- "target"
- "dbt_packages"
on-run-end:
- "{
{ re_data.save_test_history(results) }}"
- "{
{ re_data.save_run_history(results) }}"
#调节整体项目告警等级
#tests:
# +severity: warn
vars:
# 公共变量
#dt: '{
{ (modules.datetime.date.today() + modules.datetime.timedelta(hours=-8)).strftime("%Y-%m-%d") }}'
dt: '{
{ (modules.datetime.date.today()).strftime("%Y-%m-%d") }}'
monitor_table: 'monitor.monitor_source_table_update_mark_info'
# (optional) if not passed, stats for last day will be computed
re_data:time_window_start: '{
{ (run_started_at - modules.datetime.timedelta(1)).strftime("%Y-%m-%d 00:00:00") }}'
re_data:time_window_end: '{
{ run_started_at.strftime("%Y-%m-%d 00:00:00") }}'
re_data:anomaly_detector:
name: modified_z_score
threshold: 3 阀值
re_data:schemas:
- ods
- dwd
- dwm
- dws
- ads
- monitor
re_data:metrics_base:
table:
- row_count
- freshness
column:
numeric:
- min
- max
- avg
- stddev
- variance
- nulls_count
- nulls_percent
text:
- min_length
- max_length
- avg_length
- nulls_count
- missing_count
- nulls_percent
- missing_percent
# (optional) tells how much hisory you want to consider when looking for anomalies
re_data:anomaly_detection_look_back_days: 30
# In this example config, we tell dbt to build all models in the example/ directory
# as tables. These settings can be overridden in the individual model files
# using the `{
{ config(...) }}` macro.
models:
+persist_docs:
relation: true
columns: true
re_data:
enabled: true
+schema: monitor
internal:
+schema: monitor
profile_test:
# Config indicated by + and applies to all files under models/example/
ods:
schema: ods
dwd:
schema: dwd
dwm:
schema: dwm
dws:
schema: dws
ads:
schema: ads
monitor:
schema: monitor
seeds:
profile_test:
+schema: monitor
2、 /root/.dbt/profiles.yml
设置环境变量方便环境切换,或者直接固定
profile_test:
outputs:
dev:
type: redshift
threads: 1
host: "{
{ env_var('REDSHIFT_HOST') }}"
cluster_id: "{
{ env_var('REDSHIFT_CLUSTER_ID') }}"
port: 5439
user: "{
{ env_var('REDSHIFT_USER') }}"
pass: "{
{ env_var('REDSHIFT_PASSWD') }}"
dbname: "{
{ env_var('REDSHIFT_DBNAME') }}"
schema: "{
{ env_var('REDSHIFT_SCHEMA') }}"
prod:
type: redshift
threads: 1
host: "{
{ env_var('REDSHIFT_HOST') }}"
cluster_id: "{
{ env_var('REDSHIFT_CLUSTER_ID') }}"
port: 5439
user: "{
{ env_var('REDSHIFT_USER') }}"
pass: "{
{ env_var('REDSHIFT_PASSWD') }}"
dbname: "{
{ env_var('REDSHIFT_DBNAME') }}"
schema: "{
{ env_var('REDSHIFT_SCHEMA') }}"
target: dev
# 禁止发送指标收集信息
config:
send_anonymous_usage_stats: False
3、packages.yml
此处re_data 使用本地路径安装,对re_data做了二次开发
packages:
- package: dbt-labs/dbt_utils
version: [">=0.8.0", "<0.9.0"]
- local: ../re_data
二、编译
Python环境3.7及以上
1、源码下载:
https://github.com/re-data/re-data
编译需重写以下全部方法
参数添加
@notify.command()
方法重写
@add_options(dbt_flags)
@anonymous_tracking
2、报警方法添加:
可以新建python文件,添加各种报警方法
添加到 notifications 目录下
例如:钉钉报警
修改slack.py 文件
添加如下方法:
def dingTalk(webhook_url,message):
headers={
"Content-Type": "application/json"
}
data={
"msgtype": "text",
"text": {
"content": message
}
}
json_data=json.dumps(data)
requests.post(url=webhook_url,data=json_data,headers=headers)
修改re_data以下文件
详细代码如下
import click
import subprocess
import json
from datetime import date, timedelta
import shutil
import os
from re_data.templating import render
from re_data.include import OVERVIEW_INDEX_FILE_PATH
from http.server import SimpleHTTPRequestHandler
import webbrowser
from socketserver import TCPServer
from yachalk import chalk
import yaml
from re_data.notifications.slack import slack_notify,dingTalk
from re_data.utils import format_alerts_to_table, parse_dbt_vars
from dbt.config.project import Project
from re_data.tracking import anonymous_tracking
from re_data.notifications.push_monitor import alter_run
def add_options(options):
def _add_options(func):
for option in reversed(options):
func = option(func)
return func
return _add_options
def add_dbt_flags(command_list, flags):
for key, value in flags.items():
# exclude the --dbt-vars flag, as it's not a valid dbt flag
if value and key != 'dbt_vars':
key = key.replace('_', '-')
command_list.extend([f'--{
key}', value])
print(' '.join(command_list))
def get_target_paths(kwargs, re_data_target_dir=None):
project_root = os.getcwd() if not kwargs.get('project_dir') else os.path.abspath(kwargs['project_dir'])
partial = Project.partial_load(project_root)
dbt_target_path = os.path.abspath(partial.project_dict['target-path'])
if re_data_target_dir:
re_data_target_path = os.path.abspath(re_data_target_dir)
else:
re_data_target_path = os.path.join(dbt_target_path, 're_data')
return dbt_target_path, re_data_target_path
dbt_profile_option = click.option(
'--profile',
type=click.STRING,
help="""
Which profile to load. Overrides setting in dbt_project.yml
"""
)
dbt_target_option = click.option(
'--target',
type=click.STRING,
help="""
Which target to load for the given profile.
"""
)
dbt_profiles_dir_option = click.option(
'--profiles-dir',
type=click.STRING,
help="""
Which directory to look in for the profiles.yml file.
Default = ~/.dbt
"""
)
dbt_project_dir_option = click.option(
'--project-dir',
type=click.STRING,
help="""
Which directory to look in for the dbt_project.yml
file. Default is the current working directory and its
parents
"""
)
dbt_vars_option = click.option(
'--dbt-vars',
type=click.STRING,
help="""
Supply variables to the project. This argument
overrides variables defined in your dbt_project.yml
file. This argument should be a YAML string, eg.
{my_var: my_val}'
"""
)
dbt_flags = [
dbt_profile_option,
dbt_target_option,
dbt_project_dir_option,
dbt_profiles_dir_option,
dbt_vars_option
]
@click.group(help=f"re_data CLI")
def main():
pass
@main.command()
@click.argument(
'project_name'
)
@anonymous_tracking
def init(project_name):
print(f"Creating {
project_name} template project")
dir_path = os.path.dirname(os.path.realpath(__file__))
shutil.copytree(os.path.join(dir_path, 'dbt_template'), project_name)
with open(f"{
project_name}/dbt_project.yml", "w") as f:
f.write(render.render_dbt_project(project_name))
bash_command = f'cd {
project_name} && dbt deps'
response = os.system(bash_command)
if not response:
info = chalk.green("SUCCESS")
else:
info = chalk.red("FAILURE")
print(f"Creating {
project_name} template project", info)
if not response:
print(f"Setup profile & re_data:schemas var in dbt_project.yml", "INFO")
@main.command()