-- 查询单日物化视图刷新不成功的表
SELECT DISTINCT SUBSTR(a.definition,18,instr(a.definition,'SELECT')-19)
from information_schema.task_runs a
left join information_schema.tasks b on a.TASK_NAME =b.TASK_NAME
where FINISH_TIME >=CURDATE() and SUBSTR(a.definition,18,instr(a.definition,'SELECT')-18) not in(
SELECT DISTINCT SUBSTR(a.definition,18,instr(a.definition,'SELECT')-18)
from information_schema.task_runs a
left join information_schema.tasks b on a.TASK_NAME =b.TASK_NAME
where a.STATE='SUCCESS' and FINISH_TIME >=CURDATE()) -- 设置时间为当天
ORDER BY SUBSTR(a.definition,18,instr(a.definition,'SELECT')-18);
-- 查询单日物化视图刷新失败的表
select DISTINCT SUBSTR(a.definition,18,instr(a.definition,'SELECT')-18)
from information_schema.task_runs a
left join information_schema.tasks b on a.TASK_NAME =b.TASK_NAME
where a.STATE='FAILED' and FINISH_TIME >=CURDATE() ;
-- 查询物化视图的表名和字段名
SELECT DISTINCT a.TABLE_NAME ,a.COLUMN_NAME
FROM information_schema.COLUMNS a
join INFORMATION_SCHEMA.TABLES b on a.TABLE_NAME=b.TABLE_NAME
WHERE a.TABLE_SCHEMA = 'UDBZ' and TABLE_TYPE = 'VIEW' --view 是视图类型
AND substr(a.TABLE_NAME,1,3) = 'TB_';
-- 比对SR物化视图和标准层字段差异
SELECT a.TABLE_NAME,CONCAT_WS(' ','SELECT',GROUP_CONCAT(a.column_name),'FROM',a.TABLE_NAME) from SR_UDBZ_MAPPING a
where (TABLE_NAME ,a.COLUMN_NAME)
not in (
SELECT DISTINCT REPLACE (a.TABLE_NAME,'_COPY','') ,a.COLUMN_NAME
FROM information_schema.COLUMNS a
join INFORMATION_SCHEMA.TABLES b on a.TABLE_NAME=b.TABLE_NAME
WHERE a.TABLE_SCHEMA = 'UDBZ' and TABLE_TYPE = 'VIEW' --view 是视图类型
AND substr(a.TABLE_NAME,1,3) = 'TB_'
)
group by a.TABLE_NAME
-- 停止seatunnel集群
./bin/stop-seatunnel-cluster.sh
-- 启动seatunnel集群
./bin/seatunnel-cluster.sh -d
-- 修改timeout超时
vi config/seatunnel.yaml
-- 启动任务
./bin/seatunnel.sh -c job/MS_GHMX.config
--查看任务采集进度 在bin目录下执行
seatunnel.sh --list
--修改sr的字段类型
ALTER TABLE table_name MODIFY COLUMN column_name column_type;
--sr的alter命令
ALTER MATERIALIZED VIEW [db_name.]<mv_name>
{ RENAME [db_name.]<new_mv_name>
| REFRESH [ ASYNC | ASYNC [START <time>] EVERY(<interval>) |MANUAL]
| ACTIVE | INACTIVE
| SWAP WITH [db_name.]<mv2_name>
| SET ( "<key>" = "<value>"[,...]) }
--修改物化视图刷新频率
ALTER MATERIALIZED VIEW mv_name REFRESH ASYNC EVERY( interval 3 MINUTE)
--比较oracle某张表中字段数据最多
SELECT COLUMN_NAME,table_name,NUM_DISTINCT
FROM ALL_TAB_COLUMNS
WHERE TABLE_NAME ='GY_KSDM'
AND DATA_TYPE = 'DATE'
ORDER BY NUM_DISTINCT DESC;
SHOW PROC '/current_queries' --实时监控其资源消耗情况。
SHOW PROC '/backends' --查看节点状态
SHOW PROFILELIST --获取QueryId
select current_version() --查询SR当前版本
ANALYZE PROFILE FROM '<QueryId>' [, <plan_node_id>, ...]
SELECT *FROM (
SELECT table_name,COLUMN_name,ROW_NUMBER ()over(PARTITION BY table_name ORDER BY NUM_DISTINCT desc ) px
FROM DBA_TAB_COLUMNS
WHERE TABLE_NAME = 'MED_ANESTHESIA_EVENT'
AND DATA_TYPE IN ('DATE', 'TIMESTAMP')
)WHERE px=1 --查找oracle中数据量最多的日期字段
--创建范围分区
CREATE TABLE site_access (
datekey DATE,
site_id INT,
city_code SMALLINT,
user_name VARCHAR(32),
pv BIGINT DEFAULT '0'
)
ENGINE=olap
PRIMARY KEY(datekey, site_id, city_code, user_name)
PARTITION BY RANGE (datekey) (
START ("100") END ("1000000") EVERY (INTERVAL 10000))
DISTRIBUTED BY HASH(site_id)
PROPERTIES (
"replication_num" = "3"
);
ALTER materialized view TB_HIS_MZ_FEE_DETAIL_COPY_CS2 SET("session.replication_num"="1"); -- 修改物化视图的副本数
--取消物化视图的刷新
CANCEL REFRESH MATERIALIZED VIEW [database_name.]materialized_view_name
-- 手动刷新物化视图
REFRESH MATERIALIZED VIEW lo_mv1;
--修改表的副本数
ALTER TABLE UDBZ.WHST_DDB SET ("default.replication_num" = "3");
--删除表分区
ALTER TABLE site_access
DROP PARTITION p1;
-- 上传文件到另一台服务器
scp apache-seatunnel-2.3.3.tgz root@192.168.241.245:/opt
--在根目录下查找文件名
find / -name datax.tar.gz
--在根目录下查找文件夹
find / -type d -name jar
-- 部署datax流程
1.下载并上传至服务器
下载链接:http://datax-opensource.oss-cn-hangzhou.aliyuncs.com/datax.tar.gz
2.解压缩
[shuidi@hadoop102 software]$ tar -zxvf datax.tar.gz -C /opt/
3.自检
[shuidi@hadoop102 ~]$ python /opt/datax/bin/datax.py /opt/datax/job/job.json
如出现以下内容,则为成功
/*
……
2021-10-12 21:51:12.335 [job-0] INFO JobContainer -
任务启动时刻 : 2021-10-12 21:51:02
任务结束时刻 : 2021-10-12 21:51:12
任务总计耗时 : 10s
任务平均流量 : 253.91KB/s
记录写入速度 : 10000rec/s
读出记录总数 : 100000
读写失败总数 : 0 */
-- 查看僵尸进程
ps aux | grep Z
--设置会话查询参数
/*+ SET_VAR(new_planner_optimize_timeout = 60000) */
-- 设置全局查询超时时间
SET GLOBAL new_planner_optimize_timeout = 60000;
-- 查看系统中所有变量。
SHOW VARIABLES;
-- 查看符合匹配规则的变量。
SHOW VARIABLES LIKE '%new_planner_optimize_timeout%';
--sr新增字段
ALTER TABLE tablename ADD COLUMN column_name column_type [KEY | agg_type] [NULL | NOT NULL] [DEFAULT "default_value"] [AFTER column_name|FIRST] [PROPERTIES ("key"="value", ...)];
--删除字段
ALTER TABLE tablename DROP COLUMN column_name;
-- sr 修改字段
ALTER TABLE tablename MODIFY COLUMN column_name column_type [KEY | agg_type] [NULL | NOT NULL] [DEFAULT "default_value"] [COMMENT "注释"] [AFTER column_name|FIRST] [PROPERTIES ("key"="value", ...)];
-- ALTER TABLE YB_ZYJS MODIFY COLUMN LAST_EDITED_TIME datetime NOT NULL DEFAULT CURRENT_TIMESTAMP;
-- 设置时间默认值 DEFAULT CURRENT_TIMESTAMP
--查看端口号的情况
netstat -tuln
--flink的数据库配置存储信息
/opt/flink-1.16.1/jar/config.properties
--flink的默认端口是8081
http://192.168.71.41:8081/#/job/running 妇幼
--kafka默认端口9092
--sqlserver中换行符=CHAR(13)
SHOW PROC '/frontends' --查询FE是否挂掉
SHOW PROC '/backends' --查询BE是否挂掉
SHOW PROC '/compute_nodes' --查询CN是否挂掉(可选)
-- 查看系统中所有变量。
SHOW VARIABLES;
-- 查看符合匹配规则的变量。
SHOW VARIABLES LIKE '%time_zone%';
show global variables like "%query_mem_limit%";
enable_materialized_view_rewrite
/*query_mem_limit --be内存开销
sql_mode --适应sql方言*/ set sql_mode = 'PIPES_AS_CONCAT';
--查看磁盘io
iostat -dkxt 5
--设置是否需要查看查询的 profile
set enable_profile=true
--多列 Global runtime filter 开关
set enable_multicolumn_global_runtime_filter = true