GaussDB(DWS)运维 -- 锁等待监控
【问题描述】现网经常遇到因为锁等待导致的客户端感知的执行慢的问题,但是DWS没有对锁信息的历史做记录,导致时候问题分析时无法确认根因,给问题分析带来困惑;或者没法追踪&分析导致锁等待的业务语句。
【解决方案】基于版本(8.1.3及以上版本)当前具有的锁等待监控能力,做以下优化,并且把新的视图定义封装成视图
- 优化显式信息,信息显示更聚焦&直观
- 只输出申请锁时长大于30s的,以减小输出记录的条数,便于分析
- 脚本部署需要使用超户,否则定时任务调度的时候会因为权限问题导致部分字段显示异常,导致监控信息分析结果异常
在要监控的database中创建锁等待监控&分析视图,定义如下
CREATE OR REPLACE VIEW public.dfm_pgxc_locks_wait AS
SELECT * FROM pgxc_parallel_query('all', 'WITH lock_mode_conflicts(lock_mode_1, lock_mode_2) AS (
VALUES
(''AccessShareLock'', ''AccessExclusiveLock''),
(''RowShareLock'', ''ExclusiveLock''),
(''RowShareLock'', ''AccessExclusiveLock''),
(''RowExclusiveLock'', ''ShareLock''),
(''RowExclusiveLock'', ''ShareRowExclusiveLock''),
(''RowExclusiveLock'', ''ExclusiveLock''),
(''RowExclusiveLock'', ''AccessExclusiveLock''),
(''ShareUpdateExclusiveLock'', ''ShareUpdateExclusiveLock''),
(''ShareUpdateExclusiveLock'', ''ShareLock''),
(''ShareUpdateExclusiveLock'', ''ShareRowExclusiveLock''),
(''ShareUpdateExclusiveLock'', ''ExclusiveLock''),
(''ShareUpdateExclusiveLock'', ''AccessExclusiveLock''),
(''ShareLock'', ''RowExclusiveLock''),
(''ShareLock'', ''ShareUpdateExclusiveLock''),
(''ShareLock'', ''ShareRowExclusiveLock''),
(''ShareLock'', ''ExclusiveLock''),
(''ShareLock'', ''AccessExclusiveLock''),
(''ShareRowExclusiveLock'', ''RowExclusiveLock''),
(''ShareRowExclusiveLock'', ''ShareUpdateExclusiveLock''),
(''ShareRowExclusiveLock'', ''ShareLock''),
(''ShareRowExclusiveLock'', ''ShareRowExclusiveLock''),
(''ShareRowExclusiveLock'', ''ExclusiveLock''),
(''ShareRowExclusiveLock'', ''AccessExclusiveLock''),
(''ExclusiveLock'', ''RowShareLock''),
(''ExclusiveLock'', ''RowExclusiveLock''),
(''ExclusiveLock'', ''ShareUpdateExclusiveLock''),
(''ExclusiveLock'', ''ShareLock''),
(''ExclusiveLock'', ''ShareRowExclusiveLock''),
(''ExclusiveLock'', ''ExclusiveLock''),
(''ExclusiveLock'', ''AccessExclusiveLock''),
(''AccessExclusiveLock'', ''AccessShareLock''),
(''AccessExclusiveLock'', ''RowShareLock''),
(''AccessExclusiveLock'', ''RowExclusiveLock''),
(''AccessExclusiveLock'', ''ShareUpdateExclusiveLock''),
(''AccessExclusiveLock'', ''ShareLock''),
(''AccessExclusiveLock'', ''ShareRowExclusiveLock''),
(''AccessExclusiveLock'', ''ExclusiveLock''),
(''AccessExclusiveLock'', ''AccessExclusiveLock'')
),
db_info AS(
SELECT oid AS dboid, datname FROM pg_database WHERE datname = current_database()
),
pg_locks_cte AS (
SELECT l.*, d.datname,
(l.locktype || ''#'' || d.dboid || ''#'' || l.relation || ''#'' || l.page || ''#'' || l.tuple || ''#'' || l.transactionid || ''#'' || classid || ''#'' || l.objid) AS lockFlag
FROM pg_catalog.pg_locks l
INNER JOIN pg_stat_get_activity_with_conninfo(NULL) a ON a.pid = l.pid
INNER JOIN db_info d ON d.dboid = a.datid
WHERE l.locktype IN (''relation'', ''page'', ''tuple'', ''partition'', ''transactionid'')
AND now() - xact_start > INTERVAL ''10s'' /*事务启动时间大于10s, 可按需修改*/
),
lock_info AS(
SELECT /*+ set global(join_collapse_limit 4) set global (enable_bitmapscan off) set global(enable_nestloop off) */
lc.lockFlag,
lc.locktype,
pg_catalog.pgxc_node_str() nodename,
lc.datname,
r.rolname AS usename,
n.nspname,
c.relname,
p.relname partname,
lc.page,
lc.tuple,
lc.transactionid,
lc.mode,
lc.granted,
a.client_addr,
a.application_name,
a.pid,
CASE WHEN rx.gxid::text <> 0::text THEN rx.gxid ELSE rx.next_xid END gxid,
a.xact_start,
a.query_start,
a.state,
a.query_id,
a.query
FROM pg_locks_cte lc
LEFT JOIN pg_catalog.pg_class c ON (lc.relation = c.oid OR lc.classid = c.oid)
LEFT JOIN pg_catalog.pg_namespace n ON (c.relnamespace = n.oid)
LEFT JOIN pg_catalog.pg_partition p ON (lc.classid = p.parentid AND lc.objid = p.oid)
INNER JOIN pg_catalog.pg_get_running_xacts() rx ON (lc.pid = rx.pid)
INNER JOIN pg_catalog.pg_stat_get_activity_with_conninfo(NULL) a ON (lc.pid = a.pid)
INNER JOIN pg_catalog.pg_roles r ON r.oid = a.usesysid
)
SELECT
t1.nodename,
t1.locktype,
t1.datname,
t1.nspname,
t1.relname,
t1.partname,
(now()::timestamp-t1.xact_start::timestamp) AS waitime,
t1.gxid AS acquire_gxid,
t1.query_id AS acquire_queryid,
t1.pid AS acquire_lock_pid,
t2.gxid AS hold_gxid,
t2.query_id AS hold_queryid,
t2.pid AS hold_lock_pid,
(
''usename : '' || t1.usename || E''\n'' ||
''mode : '' || t1.mode || E''\n'' ||
''client_addr : '' || t1.client_addr || E''\n'' ||
''application_name : '' || t1.application_name || E''\n'' ||
''gxid : '' || t1.gxid || E''\n'' ||
''xact_start : '' || t1.xact_start || E''\n'' ||
''query_start : '' || t1.query_start || E''\n'' ||
''state : '' || t1.state || E''\n'' ||
''query_id : '' || t1.query_id || E''\n'' ||
''query : '' || t1.query || E''\n'' ||
''------------------------------------------------------''
) AS acquire_lock_event,
(
''usename : '' || t2.usename || E''\n'' ||
''mode : '' || t2.mode || E''\n'' ||
''client_addr : '' || t2.client_addr || E''\n'' ||
''application_name : '' || t2.application_name || E''\n'' ||
''gxid : '' || t2.gxid || E''\n'' ||
''xact_start : '' || t2.xact_start || E''\n'' ||
''query_start : '' || t2.query_start || E''\n'' ||
''state : '' || t2.state || E''\n'' ||
''query_id : '' || t2.query_id || E''\n'' ||
''query : '' || t2.query || E''\n'' ||
''------------------------------------------------------''
) AS hold_lock_event
FROM lock_info t1
JOIN lock_info t2 ON (t1.lockFlag = t2.lockFlag AND t1.granted = false AND t2.granted = true)
JOIN lock_mode_conflicts lc ON (t1.mode = lc.lock_mode_1 AND t2.mode = lc.lock_mode_2)
;') AS(
nodename name ,
locktype text ,
datname name ,
nspname name ,
relname name ,
partname name ,
waitime interval ,
acquire_gxid xid ,
acquire_queryid bigint ,
acquire_lock_pid bigint ,
hold_gxid xid ,
hold_queryid bigint,
hold_lock_pid bigint ,
acquire_lock_event text ,
hold_lock_event text
)
;
然后创建转储锁等待信息的表
--
----创建行存分区表转储数据,按天分区,保留最近一个月的数据
--
CREATE TABLE public.dfm_pgxc_locks_wait_history(
point timestamptz,
nodename name,
locktype text,
datname name,
nspname name,
relname name,
partname name,
waitime interval,
acquire_gxid xid ,
acquire_queryid bigint ,
acquire_lock_pid bigint ,
hold_gxid xid ,
hold_queryid bigint,
hold_lock_pid bigint ,
acquire_lock_event text,
hold_lock_event text
)
WITH (orientation=row, compression=no, period='1 day', TTL= '1 month')
DISTRIBUTE BY ROUNDROBIN
PARTITION BY RANGE(point);
CREATE INDEX idx_tab_sch_part ON public.dfm_pgxc_locks_wait_history(relname, nspname, partname) LOCAL;
创建定时任务,每隔20s采样一次,并转储到表 public.dfm_pgxc_locks_wait_history中
SELECT dbms_job.submit('INSERT INTO public.dfm_pgxc_locks_wait_history
SELECT now(), * FROM public.dfm_pgxc_locks_wait', sysdate, 'interval ''20s''');
给查询用户授权
-- 按需授权
-- 建议把视图和转储表的读权限赋给各个业务用户,以方便各个业务用户发现和分析问题
GRANT SELECT ON TABLE public.dfm_pgxc_locks_wait TO PUBLIC;
GRANT SELECT ON TABLE public.dfm_pgxc_locks_wait_history TO PUBLIC;
-- 注意:在云上,因为默认不支持把对象权限赋给所有用户(PUBLIC)。当遇到上述赋权语句失败时
-- 可以使用如下语句,先获取当前库内所有的普通权限的用户名
WITH T AS(
SELECT
string_agg(rolname, ', ') || ';' AS rolenames
FROM pg_roles
WHERE oid > 16384 AND rolsuper = false AND rolsystemadmin = false
)
SELECT 'GRANT SELECT ON TABLE public.dfm_pgxc_locks_wait TO '|| rolenames FROM t
UNION ALL
SELECT 'GRANT SELECT ON TABLE public.dfm_pgxc_locks_wait_history TO '|| rolenames FROM t
;
--然后把上述查询语句的结果作为SQL语句进行执行,如下
GRANT SELECT ON TABLE public.dfm_pgxc_locks_wait TO bfd_admin, bidi_gs_edw_poc, dbstudio_ro, fmd_admin;
GRANT SELECT ON TABLE public.dfm_pgxc_locks_wait_history TO bfd_admin, bidi_gs_edw_poc, dbstudio_ro, fmd_admin;
【使用说明】
DWS常见的影响业务的锁操作有表级锁、分区锁和事务锁三种。
- 表级锁和分区锁都是常规锁,分为8个级别,不同的操作动作会对表或者分区加对应级别的锁,不同级别的锁可能可以并发,也能互斥(如上小节描述);DWS通过锁的实现表不同操作的并发或者互斥。常见的SQL命令对应的表级锁,以及表级锁之间的冲突矩阵(X表示两个锁互斥)如下
锁级别对应的SQL动作 | ||||||||
SELECT | SELECT FOR UPDATE和SELECT FOR SHARE | UPDATE,DELETE、INSERT、MERGE | VACUUM(不带FULL选项),ANALYZE | CREATE INDEX会在表上加SHARE锁 | ALTER、DROP、TRUNCATE、REINDEX、CLUSTER、VACUUM FULL | |||
请求的锁模式/当前锁模式 | ACCESS SHARE | ROW SHARE | ROW EXCLUSIVE | SHARE UPDATE EXCLUSIVE | SHARE | SHARE ROW EXCLUSIVE | EXCLUSIVE | ACCESS EXCLUSIVE |
ACCESS SHARE | - | - | - | - | - | - | - | X |
ROW SHARE | - | - | - | - | - | - | X | X |
ROW EXCLUSIVE | - | - | - | - | X | X | X | X |
SHARE UPDATE EXCLUSIVE | - | - | - | X | X | X | X | X |
SHARE | - | - | X | X | - | X | X | X |
SHARE ROW EXCLUSIVE | - | - | X | X | X | X | X | X |
EXCLUSIVE | - | X | X | X | X | X | X | X |
ACCESS EXCLUSIVE | X | X | X | X | X | X | X | X |
- 事务锁是DWS用来实现并发更新的主要手段。严格来说DWS并不支持绝对的并发更新同一个元组(业界内数据库几乎都是类似的机制),而是通过事务锁的方式产生一个等待动作,等到上一个更新动作提交或者回滚之后,当前事务才会继续更新动作。比如事务T2在更新一条记录tuple1的时候发现事务T1已经更新了tuple1,但是事务T1还没有提交,那么T2会阻塞在当前状态,直到事务T1提交(或者回滚)之后,并且把tuple1变成了tuple2,那么这时候事物T2会把tuple2更新为tuple3
【使用场景】
1) 查询集群当前锁等待信息的实时状态
SELECT * FROM public.dfm_pgxc_locks_wait;
1.1)事务锁信息样例如下,显著的标记为locktype字段为值为transactionid
这表示在
- 实例dn_6005_6006上的库postgres里面探测到事务锁(transactionid)等待
- 导致锁等待的事务的事务id是1071581,持锁线程pid为139765079682960,申请锁线程的pid为139765079706272,申请锁线程的事务已经运行1min 12s
- 分析acquire_lock_event和hold_lock_event,可以发现是因为表t1并发更新同一个元组(或者CU)导致的,事务1071581的更新动作已经发生(hold lock),事务1071593在等待事务1071581提交
只有当事务1071581提交之后,事务1071593的UPDATE语句才能继续执行
这种场景需要针对区别分析,一般来说有以下场景
表对象存储属性 | 根因 | 处理策略 |
行存表或者hstore表 | 并发更新(UPDATE/DELETE/UPSERT/MERGE操作都可以视为更新)同一条记录 | 把并发更新动作修改为串行 |
普通列存表 |
并发更新(UPDATE/DELETE/UPSERT/MERGE操作都可以视为更新)同一条记录 | 把并发更新动作修改为串行 |
并发更新(UPDATE/DELETE/UPSERT/MERGE操作都可以视为更新)同一个CU | 把表修改为行存表或者hstore表 |
1.2) 表级锁输出信息样例如下,显著的标记为locktype字段为值为relation,类似的分区锁显著的标记为locktype字段为值为partition
这表示
- 实例cn_5001上的库postgres中探测到表public.t1上的表级锁(relation)导致的等待
- 持锁线程pid为140538704311848,申请锁线程的pid为140538704412992,申请锁线程的事务已经运行1min 15s
- 分析acquire_lock_event和hold_lock_event,可以发现是因为表t1的update和truncate并发,申请锁线程140538704412992的因为truncate动作正在申请AccessExclusiveLock,持锁线程140538704311848因为update动作持有表t1的RowExclusiveLock;二者锁动作互斥,导致truncate动作产生等待。
这种场景需要修改作业的调度逻辑或者依赖关系,把产生长时间等待的作业修改为串行,避免某些做业务长时间等待
2) 查询集群历史锁等待信息
2.1)查询某个时间段的所有锁等待信息
WITH t AS(
SELECT *,
row_number() OVER (PARTITION BY nodename, nspname, relname, partname, acquire_lock_pid, hold_lock_pid, acquire_lock_event, hold_lock_event ORDER BY point DESC) AS rn
FROM public.dfm_pgxc_locks_wait_history
WHERE point BETWEEN '2022-12-23 08:30:30' -- 开始时间
AND '2022-12-23 08:50:30' -- 结束时间
)
SELECT /*+ set global(query_dop 2)*/ * -- 加速查询
FROM t
WHERE rn = 1 -- 结果去重,只返回等待时间最长的记录
ORDER BY waitime DESC -- 按照等待时间从长到短排序
;
2.2)查询某个表在特定时间段的锁等待信息
假如业务发现2022-12-23 08:33:30发起的语句ALTER TABLE dwbaop.dwb_aop_revenue_amount_dtl_f truncate /* UR00215951-T-1223083330 */ PARTITION p_202208 执行时间超过600s。使用如下语句查看此语句的锁等待信息
WITH t AS(
SELECT *,
row_number() OVER (PARTITION BY nodename, nspname, relname, partname, acquire_lock_pid, hold_lock_pid, acquire_lock_event, hold_lock_event ORDER BY point DESC) AS rn
FROM public.dfm_pgxc_locks_wait_history
WHERE nspname = 'dwtrdi' -- schema名称
AND relname = 'dwi_ap_invoice_line_25' -- 表名
AND point BETWEEN '2023-04-20 09:20:00' AND '2023-04-20 09:40:00' -- 起始时间段
)
SELECT /*+ set global(query_dop 2)*/ * FROM t WHERE rn = 1;
或者如下SQL(因为语句事务锁导致的等待信息中nspname和relname字段为空,所以需要使用acquire_lock_event进行字段模糊匹配来寻找)
WITH t AS(
SELECT *,
row_number() OVER (PARTITION BY nodename, nspname, relname, partname, acquire_lock_pid, hold_lock_pid, acquire_lock_event, hold_lock_event ORDER BY point DESC) AS rn
FROM public.dfm_pgxc_locks_wait_history
WHERE acquire_lock_event LIKE '%DELETE FROM dwtrdi.dwi_ap_invoice_line_25%'
AND point BETWEEN '2023-04-20 09:20:00' AND '2023-04-20 09:40:00' -- 起始时间段
)
SELECT /*+ set global(query_dop 2)*/ * FROM t WHERE rn = 1;
- 点赞
- 收藏
- 关注作者
评论(0)