Gaussdb 200 分析历史top SQL

举报
哇!飞机 发表于 2022/12/14 17:48:59 2022/12/14
【摘要】 什么?等复现!天啊! 你们领导电话是多少,我要打给他。。。立刻,马上!如那最怕空气突然的安静,运维人员最怕没有部署监控。所以写了一个过程记录SQL语句的运行记录,免得客户再次发难...所有的分析都是基于 pgxc_wlm_session_statistics 和 pgxc_stat_activity , pgxc_thread_wait_status 视图。1)创建SQL运行记录的基表act...

什么?等复现!天啊! 你们领导电话是多少,我要打给他。。。立刻,马上!

如那最怕空气突然的安静,运维人员最怕没有部署监控。所以写了一个过程记录SQL语句的运行记录,免得客户再次发难...

所有的分析都是基于 pgxc_wlm_session_statistics 和 pgxc_stat_activity , pgxc_thread_wait_status 视图。

1)创建SQL运行记录的基表

act_session_history 表记录的信息是,运行过程中所消耗的cpu/内存/iops等的最大的那个值,并非是实时信息;实时信息记录在 sess_stat_hist 里面。而sess_wait_event_hist 记录的是会话的等待事件。

这三张表,通过定时任务,每1分钟调度1次 collect_sess_stat_proc() 函数,往表里面插入数据,也就是每次只获取1分钟后该会话那一刻的信息。假如 一条SQL运行了 5分54秒,由于过程只采集那些运行超过 3分钟的session,所以 3/4/5分钟那刻的信息会被记录,SQL执行时长也被记录为运行了 5 分钟,具体的 54 秒是没有被记录的,长时间运行的SQL不差这1分钟。

-- drop TABLE monica.act_session_history;
-- drop table monica.sess_wait_event_hist;
-- drop table monica.sess_stat_hist;

CREATE TABLE monica.act_session_history
(
query_start         timestamp with time zone
,dura                interval                
,finish              integer                 
,client_hostname     text                    
,coorname            text                    
,datname             text                    
,pid                 bigint                  
,usename             text                    
,app_name            text                    
,waiting             boolean                 
,enqueue             text                    
,state               text                    
,block_time          bigint                  
,estimate_total_time bigint                  
,max_dn_time         bigint                  
,max_peak_memory     integer                 
,memory_skew_percent integer                 
,spill_info          text                    
,max_spill_size      integer                 
,average_spill_size  integer                 
,max_cpu_time        bigint                  
,total_cpu_time      bigint                  
,max_peak_iops       integer                 
,average_peak_iops   integer                 
,warning             text                    
,query_id            bigint                  
,query               text                    
,mask_id text default '0'                 
)
DISTRIBUTE BY HASH( query_id )
PARTITION BY RANGE (query_start)
( 
PARTITION p1 VALUES LESS THAN ('2023-01-01 00:00:00'),
PARTITION p2 VALUES LESS THAN ('2023-04-02 00:00:00'),
PARTITION p3 VALUES LESS THAN ('2023-09-02 00:00:00'),
PARTITION p4 VALUES LESS THAN ('2023-12-01 00:00:00'),
PARTITION p5 VALUES LESS THAN ('2024-03-02 00:00:00'),
PARTITION p6 VALUES LESS THAN ('2024-07-02 00:00:00'),
PARTITION p7 VALUES LESS THAN ('2024-12-02 00:00:00'),
PARTITION p8 VALUES LESS THAN ('2025-04-02 00:00:00'),
PARTITION p9 VALUES LESS THAN ('2025-10-02 00:00:00'),
PARTITION p_max VALUES LESS THAN (MAXVALUE)
)
;


create table monica.sess_wait_event_hist
(
query_start timestamp with time zone,
dura text,                           
waiting boolean,                     
enqueue text,                        
state text,                          
query_id bigint,                     
node_name text,                      
thread_name text,                    
lwtid integer,                       
ptid integer,                        
wait_status text,                    
wait_event text 
)
DISTRIBUTE BY HASH( query_id )
PARTITION BY RANGE (query_start)
( 
PARTITION p1 VALUES LESS THAN ('2023-01-01 00:00:00'),
PARTITION p2 VALUES LESS THAN ('2023-04-02 00:00:00'),
PARTITION p3 VALUES LESS THAN ('2023-09-02 00:00:00'),
PARTITION p4 VALUES LESS THAN ('2023-12-01 00:00:00'),
PARTITION p5 VALUES LESS THAN ('2024-03-02 00:00:00'),
PARTITION p6 VALUES LESS THAN ('2024-07-02 00:00:00'),
PARTITION p7 VALUES LESS THAN ('2024-12-02 00:00:00'),
PARTITION p8 VALUES LESS THAN ('2025-04-02 00:00:00'),
PARTITION p9 VALUES LESS THAN ('2025-10-02 00:00:00'),
PARTITION p_max VALUES LESS THAN (MAXVALUE)
)
;

create table monica.sess_stat_hist (
nodename              text                     
,query_band            text                     
,pid                   bigint                   
,block_time            bigint                   
,start_time            timestamp with time zone 
,duration              bigint                   
,estimate_total_time   bigint                   
,estimate_left_time    bigint                   
,enqueue               text                     
,resource_pool         name                     
,control_group         text                     
,estimate_memory       integer                  
,min_peak_memory       integer                  
,max_peak_memory       integer                  
,average_peak_memory   integer                  
,memory_skew_percent   integer                  
,spill_info            text                     
,min_spill_size        integer                  
,max_spill_size        integer                  
,average_spill_size    integer                  
,spill_skew_percent    integer                  
,min_dn_time           bigint                   
,max_dn_time           bigint                   
,average_dn_time       bigint                   
,dntime_skew_percent   integer                  
,min_cpu_time          bigint                   
,max_cpu_time          bigint                   
,total_cpu_time        bigint                   
,cpu_skew_percent      integer                  
,min_peak_iops         integer                  
,max_peak_iops         integer                  
,average_peak_iops     integer                  
,iops_skew_percent     integer                  
,warning               text                     
,queryid               bigint                   
,node_group            text                     
)
DISTRIBUTE BY HASH( queryid )
PARTITION BY RANGE (start_time)
( 
PARTITION p1 VALUES LESS THAN ('2023-01-01 00:00:00'),
PARTITION p2 VALUES LESS THAN ('2023-04-02 00:00:00'),
PARTITION p3 VALUES LESS THAN ('2023-09-02 00:00:00'),
PARTITION p4 VALUES LESS THAN ('2023-12-01 00:00:00'),
PARTITION p5 VALUES LESS THAN ('2024-03-02 00:00:00'),
PARTITION p6 VALUES LESS THAN ('2024-07-02 00:00:00'),
PARTITION p7 VALUES LESS THAN ('2024-12-02 00:00:00'),
PARTITION p8 VALUES LESS THAN ('2025-04-02 00:00:00'),
PARTITION p9 VALUES LESS THAN ('2025-10-02 00:00:00'),
PARTITION p_max VALUES LESS THAN (MAXVALUE)
)
;

2)创建定时任务调度的函数

create or replace function collect_sess_stat_proc() returns void
as $$
BEGIN
create temp table temp_session_hist DISTRIBUTE BY HASH( query_id ) as (
select 
query_start         
,dura
,0 as finish
,client_hostname     
,coorname            
,datname             
,q1.pid              
,usename             
,app_name            
,waiting             
,enqueue             
,state               
,block_time          
,estimate_total_time
,max_dn_time
,max_peak_memory    
,memory_skew_percent
,spill_info
,max_spill_size
,average_spill_size
,max_cpu_time
,total_cpu_time
,max_peak_iops
,average_peak_iops
,warning
,query_id
,query
from (
	select 
	client_hostname                                   ,
	coorname                                          ,
	datname                                           ,
	pid                                               ,
	usename                                           ,
	application_name app_name						  ,
	query_start										  ,
	now()-query_start as dura 						  ,
	waiting                                           ,
	enqueue                                           ,
	state                                             ,
	query_id                                          , query
	from pgxc_stat_activity
	where 
	xact_start is not null and state not in( 'idle','idle in transaction') 
	and not query like E'SAVEPOINT%'
	and now()-query_start > interval '2 minute'
	and query_id != 0
) q1 
left join
(
	select 
	username                                ,
	pid										,
	nodename                                ,
	queryid                                 ,
	block_time                              ,
	start_time                              ,
	estimate_total_time						,
	max_dn_time			     				,
	max_peak_memory                         ,
	memory_skew_percent                     ,
	spill_info								,
	max_spill_size							,
	average_spill_size						,
	max_cpu_time							,
	total_cpu_time						    ,
	max_peak_iops							,
	average_peak_iops						,
	warning
from pgxc_wlm_session_statistics
) q2
	on q1.pid = q2.pid and q1.query_id = q2.queryid and q1.query_start=q2.start_time
	order by q1.query_start
)
;

create temp table temp_sess_wait_event DISTRIBUTE BY HASH( query_id ) as 
(
select query_start, to_char(sysdate-query_start, 'dd HH24:mi:ss') as dura,waiting,enqueue,state,a.query_id,node_name,thread_name,lwtid,ptid,wait_status,wait_event 
from pgxc_stat_activity a, pgxc_thread_wait_status b 
where state='active' and a.query_id = b.query_id and a.query_id <> 0 
and (a.query_id,a.query_start) in (select query_id, query_start from temp_session_hist)
and  wait_status not in ('synchronize quit','wait cmd','none','wait stream task','wait node')
order by query_start, query_id, node_name
);

create temp table temp_sess_stat DISTRIBUTE BY HASH( queryid ) as 
(
select 
nodename            
,query_band         
,pid                
,block_time         
,start_time         
,duration           
,estimate_total_time
,estimate_left_time 
,enqueue            
,resource_pool      
,control_group      
,estimate_memory    
,min_peak_memory    
,max_peak_memory    
,average_peak_memory
,memory_skew_percent
,spill_info         
,min_spill_size     
,max_spill_size     
,average_spill_size 
,spill_skew_percent 
,min_dn_time        
,max_dn_time        
,average_dn_time    
,dntime_skew_percent
,min_cpu_time       
,max_cpu_time       
,total_cpu_time     
,cpu_skew_percent   
,min_peak_iops      
,max_peak_iops      
,average_peak_iops  
,iops_skew_percent  
,warning            
,queryid            
,node_group         
from pgxc_wlm_session_statistics a
where (a.queryid, a.start_time, a.pid) in (select query_id, query_start, pid from temp_session_hist)
);


merge into monica.act_session_history as sr 
using temp_session_hist as st 
on (st.query_start=sr.query_start and st.query_id=sr.query_id)
WHEN MATCHED THEN  
UPDATE SET sr.dura = st.dura, sr.state=st.state, sr.enqueue=st.enqueue, sr.block_time=st.block_time, 
sr.max_dn_time = (CASE WHEN st.max_dn_time > sr.max_dn_time then st.max_dn_time else sr.max_dn_time end),
sr.max_peak_memory = (CASE WHEN st.max_peak_memory > sr.max_peak_memory then st.max_peak_memory else sr.max_peak_memory end),
sr.memory_skew_percent = (CASE WHEN st.memory_skew_percent > sr.memory_skew_percent then st.memory_skew_percent else sr.memory_skew_percent end),
sr.max_spill_size = (CASE WHEN st.max_spill_size > sr.max_spill_size then st.max_spill_size else sr.max_spill_size end),
sr.average_spill_size = (CASE WHEN st.average_spill_size > sr.average_spill_size then st.average_spill_size else sr.average_spill_size end),
sr.max_cpu_time = (CASE WHEN st.max_cpu_time > sr.max_cpu_time then st.max_cpu_time else sr.max_cpu_time end),
sr.total_cpu_time = (CASE WHEN st.total_cpu_time > sr.total_cpu_time then st.total_cpu_time else sr.total_cpu_time end),
sr.max_peak_iops = (CASE WHEN st.max_peak_iops > sr.max_peak_iops then st.max_peak_iops else sr.max_peak_iops end),
sr.average_peak_iops = (CASE WHEN st.average_peak_iops > sr.average_peak_iops then st.average_peak_iops else sr.average_peak_iops end),
sr.spill_info = (case when sr.spill_info ='All' THEN 'All' WHEN st.spill_info > sr.spill_info then st.spill_info else sr.spill_info end),
sr.warning = st.warning
WHEN NOT MATCHED THEN 
INSERT VALUES (
st.query_start         
,st.dura
,st.finish
,st.client_hostname     
,st.coorname            
,st.datname             
,st.pid              
,st.usename             
,st.app_name            
,st.waiting             
,st.enqueue             
,st.state               
,st.block_time          
,st.estimate_total_time
,st.max_dn_time
,st.max_peak_memory    
,st.memory_skew_percent
,st.spill_info
,st.max_spill_size
,st.average_spill_size
,st.max_cpu_time
,st.total_cpu_time
,st.max_peak_iops
,st.average_peak_iops
,st.warning
,st.query_id
,st.query
);

insert into monica.sess_wait_event_hist select * from temp_sess_wait_event;
insert into sess_stat_hist select * from temp_sess_stat;
update monica.act_session_history t set mask_id= md5(monica.f_remove_constants(t.query))	where mask_id = '0' and finish=1  ;
update monica.act_session_history set finish=1 where now() > (query_start + dura + interval '1 minute');

END;
$$ LANGUAGE plpgsql;

3)编写一个实现了uniqueSQL的过程

由于没有ORACLE的绑定变量,所以针对于每个具体的变量(1,2,3,'a','b'...),每个SQL的query_id都是不一样的,所以写一个让其一致,便于后面的信息统计。

set current_schema="monica";

create or replace function f_remove_constants( p_query in text ) 
return text
as
l_query text;
l_char  varchar(10);
l_in_quotes boolean default FALSE;
begin
for i in 1 .. length( p_query )
loop
l_char := substr(p_query,i,1);
if ( l_char = '''' and l_in_quotes )
then
l_in_quotes := FALSE;
elsif ( l_char = '''' and NOT l_in_quotes )
then
l_in_quotes := TRUE;
l_query := l_query || '''#';
end if;
if ( NOT l_in_quotes ) then
l_query := l_query || l_char;
end if;
end loop;
l_query := translate( l_query, '0123456789', '@@@@@@@@@@' );
for i in 0 .. 8 loop
l_query := replace( l_query, lpad('@',10-i,'@'), '@' );
l_query := replace( l_query, lpad(' ',10-i,' '), ' ' );
end loop;
return upper(l_query);
end;
/

4)调起定时任务

call dbms_job.isubmit(1001,'call monica.collect_sess_stat_proc(); ', sysdate, 'interval ''1 minute''');

5)编写一个获取信息的shell脚本

mask_id 相同的,代表是SQL一样只是变量不一致的的SQL文本

#!/bin/bash

#v1.0

function colorit(){
    color=31
    case $1 in
        red)
        shift   # 截掉$1后,后面所有的参数都可以使用"$@"来表示
        color=31
        ;;
        green)
        shift
        color=32
        ;;
        yellow)
        shift
        color=33
        ;;
        blue)
        shift
        color=34
        ;;
        magenta)
        shift
        color=35
        ;;
        cyan)
        shift
        color=36
        ;;
        white)
        shift
        color=37
        ;;
        *)
        echo "Usage: colorit red|green|yellow|blue|magenta|cyan|white text_msg"
        return 0
        ;;
    esac
    echo -e "\e[${color}m$@\e[0m"
}


source ${BIGDATA_HOME}/mppdb/.mppdbgs_profile
_dbname=postgres

case "$1" in 

active)
echo "$0 active [omm] [postgres]"
_user="${2:-omm}"
_dbname=${3:-postgres}
gsql -p 25308 -d $_dbname -r -c "select 
client_hostname                                   ,
datname                                           ,
coorname                                          ,
pid                                               ,
usename                                           ,
application_name   app_name                               ,
--date_trunc('second',xact_start)  xact_start       ,
date_trunc('second',query_start) query_start      ,
--now()-query_start dura                            ,
left(to_char(now()-query_start, 'dd HH24:MI:SS'),11) dura                            ,
waiting    w                                       ,
enqueue    eq                                       ,
state                                             ,
query_id                                          ,
left(query,30)
from pgxc_stat_activity
where 
usename != '${_user}'
and (xact_start is not null or state != 'idle')
order by dura 
;"
;;


ash)
_dbname=${3:-postgres}
_nday=${2:-3}
gsql -p 25308 -d $_dbname -r -c "
select
to_char(query_start,'yymmdd hh24:mi:ss') as query_start
,left(to_char(dura, 'dd HH24:MI:SS'),11) as dura
,finish  as Y            
,client_hostname as Cli_host    
,coorname as cn           
--,datname             
,pid                 
,left(usename,15) as usename
--,app_name            
,waiting as w            
,enqueue as e            
,state
,trunc(st.block_time/1000/60,2) as blkti         
,trunc(st.estimate_total_time/1000/60,2) as estim         
,trunc(st.max_dn_time/1000/60,2) as dntx               
,max_peak_memory  as mmx   
,memory_skew_percent as mmskw
,spill_info  as spinfo        
,max_spill_size  as spix    
,average_spill_size as spiav 
,trunc(st.max_cpu_time/1000/60,2) as cputx  
,trunc(st.total_cpu_time/1000/60,2) as allcput
,max_peak_iops as iopx      
,average_peak_iops as iopav  
--,warning             
,query_id            
--,query               
,left(mask_id,11) as queryid
from scott.act_session_history st 
where (trunc(now()) - interval '${_nday} day') < trunc(query_start)
--and mask_id='87dfb26891ac316f11e3eb50e95170e7'
order by 1
;
"
;;


wait)
_dbname=${3:-postgres}
gsql -p 25308 -d $_dbname -r -c "
select query_start,dura,waiting,enqueue,state,a.query_id,thread_name,wait_status,wait_event,count(*) as cnt ,max(query_start::date + dura::interval) as currt_time
from scott.sess_wait_event_hist a
where query_id=$2
group by query_start,dura,waiting,enqueue,state,a.query_id,thread_name,wait_status,wait_event
order by query_start,dura, wait_status
;
select mask_id, spill_info, warning
from scott.act_session_history where query_id=$2;
select
--,left(query, 50) as sqltext 
query
from scott.act_session_history where query_id=$2;
"
;;

hist)
_dbname=${4:-postgres}
_nday=${3:-3}
gsql -p 25308 -d $_dbname -r -c "
select
to_char(query_start,'yymmdd hh24:mi:ss') as query_start
,left(to_char(dura, 'dd HH24:MI:SS'),11) as dura
,finish  as Y            
,client_hostname as Cli_host    
,coorname as cn           
--,datname             
,pid                 
,left(usename,15) as usename
--,app_name            
,waiting as w            
,enqueue as e            
--,state
,trunc(st.block_time/1000/60,2) as blkti         
,trunc(st.estimate_total_time/1000/60,2) as estim         
,trunc(st.max_dn_time/1000/60,2) as dntx               
,max_peak_memory  as mmx   
,memory_skew_percent as mmskw
,spill_info  as spinfo        
,max_spill_size  as spix    
,average_spill_size as spiav 
,trunc(st.max_cpu_time/1000/60,2) as cputx  
,trunc(st.total_cpu_time/1000/60,2) as allcput
,max_peak_iops as iopx      
,average_peak_iops as iopav  
--,warning             
,query_id            
,left(query, 11) as sqltext
from scott.act_session_history st 
where (trunc(now()) - interval '${_nday} day') < trunc(query_start)
and mask_id='$2'
order by 1
;
"
;;


awr)
_dbname=${3:-postgres}
_nday=${2:-3}
colorit red "TOP 20 执行次数"
gsql -p 25308 -d $_dbname -r -c "
select 
mask_id,max(query_id) as query_id, usename, min(left(to_char(dura, 'dd HH24:MI:SS'),11)) as min_dura,max(left(to_char(dura, 'dd HH24:MI:SS'),11)) max_dura, count(*) as cnt 
from scott.act_session_history st 
where (trunc(now()) - interval '${_nday} day') < trunc(query_start)
group by mask_id,  usename having count(*)>=1
order by cnt desc, min_dura 
limit 20
;
"

colorit red "TOP 20 Execute time"
gsql -p 25308 -d $_dbname -r -c "
select 
query_start , dura , y , cli_host , cn , usename , w , e , blkti , estim , dntx , mmx , mmskw , spinfo , spix , spiav , cputx , allcput , iopx , iopav , query_id , mask_id
from (
select
to_char(query_start,'yymmdd hh24:mi:ss') as query_start
,left(to_char(dura, 'dd HH24:MI:SS'),11) as dura
,finish  as Y            
,client_hostname as Cli_host    
,coorname as cn           
--,datname             
--,pid                 
,left(usename,15) as usename
--,app_name            
,waiting as w            
,enqueue as e            
--,state
,trunc(st.block_time/1000/60,2) as blkti         
,trunc(st.estimate_total_time/1000/60,2) as estim     
,trunc(st.max_dn_time/1000/60,2) as dntx               
,max_peak_memory  as mmx   
,memory_skew_percent as mmskw
,spill_info  as spinfo        
,max_spill_size  as spix    
,average_spill_size as spiav 
,trunc(st.max_cpu_time/1000/60,2) as cputx  
,trunc(st.total_cpu_time/1000/60,2) as allcput
,max_peak_iops as iopx      
,average_peak_iops as iopav  
--,warning             
,query_id            
--,query  
,mask_id
--,left(mask_id,11) as queryid
, Row_number() over(PARTITION by mask_id order by dura desc) as _rank
from scott.act_session_history st 
where (trunc(now()) - interval '${_nday} day') < trunc(query_start)
  and dura is not null
) where _rank=1
order by dura desc
limit 20
;
"

colorit red "TOP 20 Memory"
gsql -p 25308 -d $_dbname -r -c "
select 
query_start , dura , y , cli_host , cn , usename , w , e , blkti , estim , dntx , mmx , mmskw , spinfo , spix , spiav , cputx , allcput , iopx , iopav , query_id , mask_id
from (
select
to_char(query_start,'yymmdd hh24:mi:ss') as query_start
,left(to_char(dura, 'dd HH24:MI:SS'),11) as dura
,finish  as Y            
,client_hostname as Cli_host    
,coorname as cn           
--,datname             
--,pid                 
,left(usename,15) as usename
--,app_name            
,waiting as w            
,enqueue as e            
--,state
,trunc(st.block_time/1000/60,2) as blkti         
,trunc(st.estimate_total_time/1000/60,2) as estim     
,trunc(st.max_dn_time/1000/60,2) as dntx               
,max_peak_memory  as mmx   
,memory_skew_percent as mmskw
,spill_info  as spinfo        
,max_spill_size  as spix    
,average_spill_size as spiav 
,trunc(st.max_cpu_time/1000/60,2) as cputx  
,trunc(st.total_cpu_time/1000/60,2) as allcput
,max_peak_iops as iopx      
,average_peak_iops as iopav  
--,warning             
,query_id            
--,query  
,mask_id
--,left(mask_id,11) as queryid
, Row_number() over(PARTITION by mask_id order by max_peak_memory desc) as _rank
from scott.act_session_history st 
where (trunc(now()) - interval '${_nday} day') < trunc(query_start)
  and max_peak_memory is not null
) where _rank=1
order by mmx desc 
limit 20
;
"

colorit red "TOP 20 IOPS"
gsql -p 25308 -d $_dbname -r -c "
select 
query_start , dura , y , cli_host , cn , usename , w , e , blkti , estim , dntx , mmx , mmskw , spinfo , spix , spiav , cputx , allcput , iopx , iopav , query_id , mask_id
from (
select
to_char(query_start,'yymmdd hh24:mi:ss') as query_start
,left(to_char(dura, 'dd HH24:MI:SS'),11) as dura
,finish  as Y            
,client_hostname as Cli_host    
,coorname as cn           
--,datname             
--,pid                 
,left(usename,15) as usename
--,app_name            
,waiting as w            
,enqueue as e            
--,state
,trunc(st.block_time/1000/60,2) as blkti         
,trunc(st.estimate_total_time/1000/60,2) as estim     
,trunc(st.max_dn_time/1000/60,2) as dntx               
,max_peak_memory  as mmx   
,memory_skew_percent as mmskw
,spill_info  as spinfo        
,max_spill_size  as spix    
,average_spill_size as spiav 
,trunc(st.max_cpu_time/1000/60,2) as cputx  
,trunc(st.total_cpu_time/1000/60,2) as allcput
,max_peak_iops as iopx      
,average_peak_iops as iopav  
--,warning             
,query_id            
--,query  
,mask_id
--,left(mask_id,11) as queryid
, Row_number() over(PARTITION by mask_id order by max_peak_iops desc) as _rank
from scott.act_session_history st 
where (trunc(now()) - interval '${_nday} day') < trunc(query_start)
  and max_peak_iops is not null
) where _rank=1
order by iopx desc
limit 20
;
"
colorit red "TOP 20 DiskWrite"
gsql -p 25308 -d $_dbname -r -c "
select 
query_start , dura , y , cli_host , cn , usename , w , e , blkti , estim , dntx , mmx , mmskw , spinfo , spix , spiav , cputx , allcput , iopx , iopav , query_id , mask_id
from (
select
to_char(query_start,'yymmdd hh24:mi:ss') as query_start
,left(to_char(dura, 'dd HH24:MI:SS'),11) as dura
,finish  as Y            
,client_hostname as Cli_host    
,coorname as cn           
,left(usename,15) as usename
,waiting as w            
,enqueue as e            
,trunc(st.block_time/1000/60,2) as blkti         
,trunc(st.estimate_total_time/1000/60,2) as estim     
,trunc(st.max_dn_time/1000/60,2) as dntx               
,max_peak_memory  as mmx   
,memory_skew_percent as mmskw
,spill_info  as spinfo        
,max_spill_size  as spix    
,average_spill_size as spiav 
,trunc(st.max_cpu_time/1000/60,2) as cputx  
,trunc(st.total_cpu_time/1000/60,2) as allcput
,max_peak_iops as iopx      
,average_peak_iops as iopav  
,query_id            
,mask_id
, Row_number() over(PARTITION by mask_id order by max_spill_size desc) as _rank
from scott.act_session_history st 
where (trunc(now()) - interval '${_nday} day') < trunc(query_start)
  and max_spill_size is not null
) where _rank=1
order by spix desc
limit 20
;
"

colorit red "TOP 20 CPU"
gsql -p 25308 -d $_dbname -r -c "
select 
query_start , dura , y , cli_host , cn , usename , w , e , blkti , estim , dntx , mmx , mmskw , spinfo , spix , spiav , cputx , allcput , iopx , iopav , query_id , mask_id
from (
select
to_char(query_start,'yymmdd hh24:mi:ss') as query_start
,left(to_char(dura, 'dd HH24:MI:SS'),11) as dura
,finish  as Y            
,client_hostname as Cli_host    
,coorname as cn           
,left(usename,15) as usename
,waiting as w            
,enqueue as e            
,trunc(st.block_time/1000/60,2) as blkti         
,trunc(st.estimate_total_time/1000/60,2) as estim     
,trunc(st.max_dn_time/1000/60,2) as dntx               
,max_peak_memory  as mmx   
,memory_skew_percent as mmskw
,spill_info  as spinfo        
,max_spill_size  as spix    
,average_spill_size as spiav 
,trunc(st.max_cpu_time/1000/60,2) as cputx  
,trunc(st.total_cpu_time/1000/60,2) as allcput
,max_peak_iops as iopx      
,average_peak_iops as iopav  
,query_id            
,mask_id
, Row_number() over(PARTITION by mask_id order by total_cpu_time desc) as _rank
from scott.act_session_history st 
where (trunc(now()) - interval '${_nday} day') < trunc(query_start)
  and total_cpu_time is not null
) where _rank=1
order by allcput desc
limit 20
;
"

;;


para)
echo "$0 para paraname"
gsql -p 25308 -d $_dbname -r -c "
select name,context,vartype,left(setting,70) setting
--,left(category,30) category
,left(short_desc,80) short_desc from pg_settings where name like '%$2%'
;
"
;;



*)
echo "001) dba hist"
echo "002) dba active"
echo "003) dba wait"
echo "004) dba awr"
echo "005) dba para"

;;
esac

6)使用案例

# 默认查看3 天
dba ash 

# 查看 n 天内的SQL运行情况,按启动时间排序
dba ash n

# 查看某条SQL的执行情况
dba wait query_id

# 查看相同的SQL的历史执行情况
# mask_id 是将SQL处理后生成的md5值,可以等同于query_id,不同变量的SQL,其mask_id一样
dba hist mask_id

# 查看n天内,iops/内存/cpu/数据落盘/执行次数最多/执行时长最高的SQL统计
dba awr n


查看当天的运行时长超过n分钟的SQL记录:

统计 3 天的高消耗 SQL: 比如找到了 3 天内,query_id=366198944724461271 执行了 305 次。

查找具体的SQL运行信息:query_id=366198944724461271

根据等待事件,可以推测出该表数据倾斜严重,大部分时间都在两个dn上进行等待。

查找该类SQL的历史执行情况:可以看到历史上,该SQL的执行所消耗的资源,时长是否稳定。根据mask_id查找。

总结)

1,act_session_history 是通过merge来实现插入与更新,会导致表膨胀,需要做vacuum full或者直接truncate就完事。

2,运行小于 3 分钟的SQL 也会有可能导致系统性能问题,该过程并没有将其统计收集。但是就这样吧。

3,将就使用吧。

【版权声明】本文为华为云社区用户原创内容,转载时必须标注文章的来源(华为云社区)、文章链接、文章作者等基本信息, 否则作者和本社区有权追究责任。如果您发现本社区中有涉嫌抄袭的内容,欢迎发送邮件进行举报,并提供相关证据,一经查实,本社区将立刻删除涉嫌侵权内容,举报邮箱: cloudbbs@huaweicloud.com
  • 点赞
  • 收藏
  • 关注作者

评论(0

0/1000
抱歉,系统识别当前为高风险访问,暂不支持该操作

全部回复

上滑加载中

设置昵称

在此一键设置昵称,即可参与社区互动!

*长度不超过10个汉字或20个英文字符,设置后3个月内不可修改。

*长度不超过10个汉字或20个英文字符,设置后3个月内不可修改。