【最佳实践】物化视图常用运维方法
【摘要】 1. 常用元信息
2. 查看视图reloptions --推荐turbo+opt格式,刷新性能最优
3. 查看分区映射关系
4. 查看刷新耗时
5. 提取拓扑关系 --可视化分析拓扑关系
6. 在线删除物化视图
7. 使用不稳定函数 --如何强制使用to_date等不稳定函数
8. 配置级联刷新 --级联刷新节省资源,数据新鲜度一致
9. 级联修改定义 --辅助级联修改
1. 常用元信息
pg_matview --物化视图主表属性信息
pg_matview_partition --物化视图分区属性信息
pg_matview_depend --物化视图与基表的依赖信息
pv_matview_detail --综合显示属性,依赖和刷新信息
2. 查看视图reloptions
SELECT relname, reloption FROM pg_class WHERE relkind='m'; --推荐turbo+opt格式
3. 查看分区映射关系
SELECT
mapping as "base partition -> matview partition"
FROM pv_matview_detail
WHERE matview = 'test_base_mv1';
4. 查看刷新耗时
主表耗时
SELECT
distinct matview,
refresh_start_time,
refresh_finish_time - refresh_start_time as duration,
now() - refresh_finish_time as alive_time
FROM pv_matview_detail;
各分区耗时
SELECT mv_name,
CASE WHEN main_start is not null and main_start >= part_start THEN main_start WHEN part_start is not null THEN part_start ELSE null END as refresh_start_time,
CASE WHEN main_start is not null and main_start >= part_start THEN main_duration WHEN part_start is not null THEN part_duration ELSE null END as duration,
now() - refresh_start_time + duration as alive_time,
CASE WHEN main_start is not null and main_start >= part_start THEN 'full refresh' WHEN part_start is not null THEN 'partition refresh' ELSE null END as type
FROM (
SELECT mv_name, date_trunc('second', main_start) as main_start, main_duration,
date_trunc('second', (select refresh_start_time FROM pg_matview_partition WHERE matpartid = partid)) as part_start,
(SELECT refresh_finish_time - refresh_start_time FROM pg_matview_partition WHERE matpartid = partid) as part_duration
FROM (
SELECT mvid::regclass::text as mv_name, refresh_time as main_start, refresh_finish_time - refresh_time as main_duration,
(SELECT matpartid FROM pg_matview_partition m, pg_partition p WHERE m.matpartid = p.oid and p.parentid = mvid order by 1 desc limit 1) as partid
FROM pg_matview
)
);
近期历史耗时
SELECT
m.mvid::regclass view_name,
m.interval,
(SELECT MAX(actual_end_time-actual_start_time) FROM pg_task_history WHERE regexp_like(what, 'REFRESH MATERIALIZED VIEW .*\.'||m.mvid::regclass::text||'[ ;]')) max_refresh_time,
(SELECT what FROM pg_task_history WHERE regexp_like(what, 'REFRESH MATERIALIZED VIEW .*\.'||m.mvid::regclass::text||'[ ;]') limit 1) job
FROM pg_matview m;
5. 提取拓扑关系
将以下内容直接贴到开源画图工具,例如:mermaid中即可。
输出依赖关系和刷新间隔
SELECT
matview || (SELECT CASE WHEN interval IS NULL THEN '' ELSE '[' || matview || '<br>' || interval || ']' END FROM pg_matview WHERE mvid::regclass::text=matview) || ' --> ' ||
baserel || (SELECT CASE WHEN interval IS NULL THEN '' ELSE '[' || baserel || '<br>' || interval || ']' END FROM pg_matview WHERE mvid::regclass::text=baserel)
FROM pv_matview_detail WHERE mapping is NULL;
效果如下:
输出依赖关系,刷新间隔,最近耗时和级联情况
SELECT
matview || '[' || matview || (SELECT CASE WHEN interval IS NULL THEN '' ELSE '<br>interval:' || interval END FROM pg_matview WHERE mvid::regclass::text=matview) ||
(SELECT CASE WHEN MAX(actual_end_time-actual_start_time) IS NULL THEN '' ELSE '<br>cost:' || MAX(actual_end_time-actual_start_time) END FROM scheduler.pg_task_history WHERE regexp_like(what, 'REFRESH MATERIALIZED VIEW .*\.'||matview||'[ ;]')) ||
(SELECT CASE WHEN regexp_substr(what, 'CASCADE[^;]*') IS NULL THEN '' ELSE ','||regexp_substr(what, 'CASCADE[^;]*') END FROM scheduler.pg_task_history WHERE regexp_like(what, 'REFRESH MATERIALIZED VIEW .*\.'||matview||'[ ;]') LIMIT 1)
|| '] --> ' ||
baserel || '[' || baserel || (SELECT CASE WHEN interval IS NULL THEN '' ELSE '<br>interval:' || interval END FROM pg_matview WHERE mvid::regclass::text=baserel) ||
(SELECT CASE WHEN MAX(actual_end_time-actual_start_time) IS NULL THEN '' ELSE '<br>cost:' || MAX(actual_end_time-actual_start_time) END FROM scheduler.pg_task_history WHERE regexp_like(what, 'REFRESH MATERIALIZED VIEW .*\.'||baserel||'[ ;]')) ||
(SELECT CASE WHEN regexp_substr(what, 'CASCADE[^;]*') IS NULL THEN '' ELSE ','||regexp_substr(what, 'CASCADE[^;]*') END FROM scheduler.pg_task_history WHERE regexp_like(what, 'REFRESH MATERIALIZED VIEW .*\.'||baserel||'[ ;]') LIMIT 1)
|| ']'
FROM pv_matview_detail WHERE mapping is NULL
6. 在线删除物化视图
正在执行的刷新任务,会堵塞删除。因此,需要先停止刷新任务再删除。
--1、停止刷新任务(仅替换SQL中汉字)
UPDATE scheduler.pg_task SET is_broken = true
WHERE category_id = 'MatViewKernelTask' AND
(what like 'REFRESH MATERIALIZED VIEW 视图模式.视图名 %%' or what = 'REFRESH MATERIALIZED VIEW 视图模式.视图名;');
--2、删除物化视图
DROP MATERIALIZED VIEW 视图模式.视图名;
7. 使用不稳定函数
不稳定函数会导致物化视图结果不稳定。
如果需要强制使用,可以设置mv_support_function_type属性。
查询函数的稳定属性
SELECT
proname,
CASE WHEN provolatile='s' THEN 'stable' WHEN provolatile='v' THEN 'volatile' ELSE 'unknown' END AS set_mv_support_function_type_to
FROM pg_proc
WHERE proname='to_date';
创建强制使用不稳定函数的物化视图
CREATE MATERIALIZED VIEW dws_vip_month_detail
REFRESH RESTRICT START WITH(trunc(sysdate)) EVERY (interval '5 minute')
ENABLE QUERY REWRITE
WITH (orientation=column, enable_hstore_opt=ON, mv_support_function_type=stable)
DISTRIBUTE BY HASH(out_trade_id) AS
SELECT
a.month_end_date AS month_end_date,
sum(a.sale_vip) AS sale_vip,
now () AS etl_time
FROM dws_vip_day_detail a
WHERE a.month_end_date >= CAST (date_sub (current_date (), INTERVAL '2' day) AS DATE);
使用了now(), current_date()不稳定函数,配置’mv_support_function_type=stable’可强制使用。
8. 配置级联刷新
嵌套场景最好配置成级联刷新,防止下层刷新堵塞上层刷新,也能减少定时任务,降低资源消耗。
a. 如果不配置级联刷新,那上层的间隔要大于下层的间隔,防止上层刷新时读到下层面的旧数据。
b. 开启查杀参数ddl_select_concurrent_mode=‘insert_overwrite’; 全量刷新时强制查杀物化视图上面的查询,防止查询堵塞刷新。
CREATE TABLE test_base_tab1(
i_int int,
i_bigint bigint,
i_numeric numeric(7,2),
i_bool BOOLEAN,
i_varchar varchar,
i_text text,
i_data date,
i_timestamp timestamp,
i_text1 text
) WITH(orientation=column, enable_hstore_opt=ON) DISTRIBUTE BY HASH(i_int);
--创建第一层视图,CASCADE ALL指定级联刷新,START WITH EVERY指定定时刷新
CREATE MATERIALIZED VIEW test_base_mv1_1
BUILD IMMEDIATE
REFRESH CASCADE ALL COMPLETE START WITH ('2024-11-03 16:00:00+00'::TIMESTAMPTZ) EVERY (interval '00:01:00')
ENABLE QUERY REWRITE AS
SELECT count(*) AS C1, i_timestamp FROM test_base_tab1 GROUP BY i_timestamp;
--创建第二层视图,ON DEMAND指定手动刷新
CREATE MATERIALIZED VIEW test_base_mv1_2
BUILD IMMEDIATE
REFRESH COMPLETE ON DEMAND
ENABLE QUERY REWRITE AS
SELECT sum(C1) AS C1, i_timestamp FROM test_base_mv1_1 GROUP BY i_timestamp;
--创建第三层视图,ON DEMAND指定手动刷新
CREATE MATERIALIZED VIEW test_base_mv1_3
BUILD IMMEDIATE
REFRESH COMPLETE ON DEMAND
ENABLE QUERY REWRITE AS
SELECT sum(c1) AS C1, i_timestamp FROM test_base_mv1_2 GROUP BY i_timestamp;
--这样第一层视图定时刷新时,会级联的把上层物化视图全部刷新。
9. 级联修改定义
物化视图之间的依赖关系,会导致无法直接修改底层物化视图的定义。
需要先删除上层物化视图,才能修改底层物化视图。
现在提供一个方便级联修改的方法,省去了“寻找依赖关系”和“挨个获取定义”的麻烦。
自底向上,级联获取视图定义的函数
--cascade get matview define
CREATE OR REPLACE FUNCTION public.pg_get_matview_define_cascade(in schema_name TEXT, in view_name TEXT, in level int DEFAULT 1)
RETURNS VOID
AS $$
DECLARE
is_matview bool;
view_rec record;
view_define text;
parent_views text;
parent_schema text;
parent_vname text;
BEGIN
--1. output currnet matview define
SELECT relkind = 'm' INTO is_matview FROM pg_class WHERE OID = (schema_name || '.' || view_name)::regclass;
IF is_matview THEN
dbms_output.put_line('--level ' || level || ', get define of object: ' || schema_name || '.' || view_name);
SELECT pg_get_viewdef(schema_name || '.' || view_name::regclass) INTO view_define;
dbms_output.put_line(view_define);
ELSE
level = level - 1;
END IF;
--2. get parent matview
parent_views := 'SELECT matviewid FROM pg_matview_depend WHERE matpartid IS NULL AND relid=''' || schema_name || '.' || view_name || '''::regclass';
FOR view_rec IN EXECUTE(parent_views) LOOP
SELECT nspname, relname INTO parent_schema, parent_vname FROM pg_class c, pg_namespace n where c.relnamespace = n.oid and c.oid = view_rec.matviewid;
public.pg_get_matview_define_cascade(parent_schema, parent_vname, level + 1);
END LOOP;
END; $$
LANGUAGE 'plpgsql';
级联停止刷新任务的函数
--cascade stop matview refresh task
CREATE OR REPLACE FUNCTION public.pg_matview_stop_refresh_task_cascade(in schema_name TEXT, in view_name TEXT, in level int DEFAULT 1)
RETURNS VOID
AS $$
DECLARE
is_matview bool;
view_rec record;
parent_views text;
parent_schema text;
parent_vname text;
BEGIN
--1. stop currnet matview refresh task
SELECT relkind = 'm' INTO is_matview FROM pg_class WHERE OID = (schema_name || '.' || view_name)::regclass;
IF is_matview THEN
UPDATE scheduler.pg_task SET is_broken = true WHERE category_id = 'MatViewKernelTask' AND
(what like 'REFRESH MATERIALIZED VIEW ' || schema_name || '.' || view_name || ' %%' or
what = 'REFRESH MATERIALIZED VIEW ' || schema_name || '.' || view_name || ';');
dbms_output.put_line('--level ' || level || ', stop refresh task of matview: ' || schema_name || '.' || view_name);
ELSE
level = level - 1;
END IF;
--2. cascade get parent matview
parent_views := 'SELECT matviewid FROM pg_matview_depend WHERE matpartid IS NULL AND relid=''' || schema_name || '.' || view_name || '''::regclass';
FOR view_rec IN EXECUTE(parent_views) LOOP
SELECT nspname, relname INTO parent_schema, parent_vname FROM pg_class c, pg_namespace n where c.relnamespace = n.oid and c.oid = view_rec.matviewid;
public.pg_matview_stop_refresh_task_cascade(parent_schema, parent_vname, level + 1);
END LOOP;
END; $$
LANGUAGE 'plpgsql';
级联修改定义方法
--1. 级联批量获取定义
select pg_get_matview_define_cascade('public', 'test_base_mv1_1');
--2. 将批量定义按业务要求修改好
--3. 批量停止刷新任务,把“将要启动还尚未启动”的刷新任务停掉,防止刷新任务堵塞删除。
select pg_matview_stop_refresh_task_cascade('public', 'test_base_mv1_1');
--4. 级联删除物化视图
drop materialized view public.test_base_mv1_1 cascade;
--5. 批量创建修改好的物化视图
【声明】本内容来自华为云开发者社区博主,不代表华为云及华为云开发者社区的观点和立场。转载时必须标注文章的来源(华为云社区)、文章链接、文章作者等基本信息,否则作者和本社区有权追究责任。如果您发现本社区中有涉嫌抄袭的内容,欢迎发送邮件进行举报,并提供相关证据,一经查实,本社区将立刻删除涉嫌侵权内容,举报邮箱:
cloudbbs@huaweicloud.com
- 点赞
- 收藏
- 关注作者
评论(0)