Gaussdb 200 分析历史top SQL
什么?等复现!天啊! 你们领导电话是多少,我要打给他。。。立刻,马上!
如那最怕空气突然的安静,运维人员最怕没有部署监控。所以写了一个过程记录SQL语句的运行记录,免得客户再次发难...
所有的分析都是基于 pgxc_wlm_session_statistics 和 pgxc_stat_activity , pgxc_thread_wait_status 视图。
1)创建SQL运行记录的基表
act_session_history 表记录的信息是,运行过程中所消耗的cpu/内存/iops等的最大的那个值,并非是实时信息;实时信息记录在 sess_stat_hist 里面。而sess_wait_event_hist 记录的是会话的等待事件。
这三张表,通过定时任务,每1分钟调度1次 collect_sess_stat_proc() 函数,往表里面插入数据,也就是每次只获取1分钟后该会话那一刻的信息。假如 一条SQL运行了 5分54秒,由于过程只采集那些运行超过 3分钟的session,所以 3/4/5分钟那刻的信息会被记录,SQL执行时长也被记录为运行了 5 分钟,具体的 54 秒是没有被记录的,长时间运行的SQL不差这1分钟。
-- drop TABLE monica.act_session_history;
-- drop table monica.sess_wait_event_hist;
-- drop table monica.sess_stat_hist;
CREATE TABLE monica.act_session_history
(
query_start timestamp with time zone
,dura interval
,finish integer
,client_hostname text
,coorname text
,datname text
,pid bigint
,usename text
,app_name text
,waiting boolean
,enqueue text
,state text
,block_time bigint
,estimate_total_time bigint
,max_dn_time bigint
,max_peak_memory integer
,memory_skew_percent integer
,spill_info text
,max_spill_size integer
,average_spill_size integer
,max_cpu_time bigint
,total_cpu_time bigint
,max_peak_iops integer
,average_peak_iops integer
,warning text
,query_id bigint
,query text
,mask_id text default '0'
)
DISTRIBUTE BY HASH( query_id )
PARTITION BY RANGE (query_start)
(
PARTITION p1 VALUES LESS THAN ('2023-01-01 00:00:00'),
PARTITION p2 VALUES LESS THAN ('2023-04-02 00:00:00'),
PARTITION p3 VALUES LESS THAN ('2023-09-02 00:00:00'),
PARTITION p4 VALUES LESS THAN ('2023-12-01 00:00:00'),
PARTITION p5 VALUES LESS THAN ('2024-03-02 00:00:00'),
PARTITION p6 VALUES LESS THAN ('2024-07-02 00:00:00'),
PARTITION p7 VALUES LESS THAN ('2024-12-02 00:00:00'),
PARTITION p8 VALUES LESS THAN ('2025-04-02 00:00:00'),
PARTITION p9 VALUES LESS THAN ('2025-10-02 00:00:00'),
PARTITION p_max VALUES LESS THAN (MAXVALUE)
)
;
create table monica.sess_wait_event_hist
(
query_start timestamp with time zone,
dura text,
waiting boolean,
enqueue text,
state text,
query_id bigint,
node_name text,
thread_name text,
lwtid integer,
ptid integer,
wait_status text,
wait_event text
)
DISTRIBUTE BY HASH( query_id )
PARTITION BY RANGE (query_start)
(
PARTITION p1 VALUES LESS THAN ('2023-01-01 00:00:00'),
PARTITION p2 VALUES LESS THAN ('2023-04-02 00:00:00'),
PARTITION p3 VALUES LESS THAN ('2023-09-02 00:00:00'),
PARTITION p4 VALUES LESS THAN ('2023-12-01 00:00:00'),
PARTITION p5 VALUES LESS THAN ('2024-03-02 00:00:00'),
PARTITION p6 VALUES LESS THAN ('2024-07-02 00:00:00'),
PARTITION p7 VALUES LESS THAN ('2024-12-02 00:00:00'),
PARTITION p8 VALUES LESS THAN ('2025-04-02 00:00:00'),
PARTITION p9 VALUES LESS THAN ('2025-10-02 00:00:00'),
PARTITION p_max VALUES LESS THAN (MAXVALUE)
)
;
create table monica.sess_stat_hist (
nodename text
,query_band text
,pid bigint
,block_time bigint
,start_time timestamp with time zone
,duration bigint
,estimate_total_time bigint
,estimate_left_time bigint
,enqueue text
,resource_pool name
,control_group text
,estimate_memory integer
,min_peak_memory integer
,max_peak_memory integer
,average_peak_memory integer
,memory_skew_percent integer
,spill_info text
,min_spill_size integer
,max_spill_size integer
,average_spill_size integer
,spill_skew_percent integer
,min_dn_time bigint
,max_dn_time bigint
,average_dn_time bigint
,dntime_skew_percent integer
,min_cpu_time bigint
,max_cpu_time bigint
,total_cpu_time bigint
,cpu_skew_percent integer
,min_peak_iops integer
,max_peak_iops integer
,average_peak_iops integer
,iops_skew_percent integer
,warning text
,queryid bigint
,node_group text
)
DISTRIBUTE BY HASH( queryid )
PARTITION BY RANGE (start_time)
(
PARTITION p1 VALUES LESS THAN ('2023-01-01 00:00:00'),
PARTITION p2 VALUES LESS THAN ('2023-04-02 00:00:00'),
PARTITION p3 VALUES LESS THAN ('2023-09-02 00:00:00'),
PARTITION p4 VALUES LESS THAN ('2023-12-01 00:00:00'),
PARTITION p5 VALUES LESS THAN ('2024-03-02 00:00:00'),
PARTITION p6 VALUES LESS THAN ('2024-07-02 00:00:00'),
PARTITION p7 VALUES LESS THAN ('2024-12-02 00:00:00'),
PARTITION p8 VALUES LESS THAN ('2025-04-02 00:00:00'),
PARTITION p9 VALUES LESS THAN ('2025-10-02 00:00:00'),
PARTITION p_max VALUES LESS THAN (MAXVALUE)
)
;
2)创建定时任务调度的函数
create or replace function collect_sess_stat_proc() returns void
as $$
BEGIN
create temp table temp_session_hist DISTRIBUTE BY HASH( query_id ) as (
select
query_start
,dura
,0 as finish
,client_hostname
,coorname
,datname
,q1.pid
,usename
,app_name
,waiting
,enqueue
,state
,block_time
,estimate_total_time
,max_dn_time
,max_peak_memory
,memory_skew_percent
,spill_info
,max_spill_size
,average_spill_size
,max_cpu_time
,total_cpu_time
,max_peak_iops
,average_peak_iops
,warning
,query_id
,query
from (
select
client_hostname ,
coorname ,
datname ,
pid ,
usename ,
application_name app_name ,
query_start ,
now()-query_start as dura ,
waiting ,
enqueue ,
state ,
query_id , query
from pgxc_stat_activity
where
xact_start is not null and state not in( 'idle','idle in transaction')
and not query like E'SAVEPOINT%'
and now()-query_start > interval '2 minute'
and query_id != 0
) q1
left join
(
select
username ,
pid ,
nodename ,
queryid ,
block_time ,
start_time ,
estimate_total_time ,
max_dn_time ,
max_peak_memory ,
memory_skew_percent ,
spill_info ,
max_spill_size ,
average_spill_size ,
max_cpu_time ,
total_cpu_time ,
max_peak_iops ,
average_peak_iops ,
warning
from pgxc_wlm_session_statistics
) q2
on q1.pid = q2.pid and q1.query_id = q2.queryid and q1.query_start=q2.start_time
order by q1.query_start
)
;
create temp table temp_sess_wait_event DISTRIBUTE BY HASH( query_id ) as
(
select query_start, to_char(sysdate-query_start, 'dd HH24:mi:ss') as dura,waiting,enqueue,state,a.query_id,node_name,thread_name,lwtid,ptid,wait_status,wait_event
from pgxc_stat_activity a, pgxc_thread_wait_status b
where state='active' and a.query_id = b.query_id and a.query_id <> 0
and (a.query_id,a.query_start) in (select query_id, query_start from temp_session_hist)
and wait_status not in ('synchronize quit','wait cmd','none','wait stream task','wait node')
order by query_start, query_id, node_name
);
create temp table temp_sess_stat DISTRIBUTE BY HASH( queryid ) as
(
select
nodename
,query_band
,pid
,block_time
,start_time
,duration
,estimate_total_time
,estimate_left_time
,enqueue
,resource_pool
,control_group
,estimate_memory
,min_peak_memory
,max_peak_memory
,average_peak_memory
,memory_skew_percent
,spill_info
,min_spill_size
,max_spill_size
,average_spill_size
,spill_skew_percent
,min_dn_time
,max_dn_time
,average_dn_time
,dntime_skew_percent
,min_cpu_time
,max_cpu_time
,total_cpu_time
,cpu_skew_percent
,min_peak_iops
,max_peak_iops
,average_peak_iops
,iops_skew_percent
,warning
,queryid
,node_group
from pgxc_wlm_session_statistics a
where (a.queryid, a.start_time, a.pid) in (select query_id, query_start, pid from temp_session_hist)
);
merge into monica.act_session_history as sr
using temp_session_hist as st
on (st.query_start=sr.query_start and st.query_id=sr.query_id)
WHEN MATCHED THEN
UPDATE SET sr.dura = st.dura, sr.state=st.state, sr.enqueue=st.enqueue, sr.block_time=st.block_time,
sr.max_dn_time = (CASE WHEN st.max_dn_time > sr.max_dn_time then st.max_dn_time else sr.max_dn_time end),
sr.max_peak_memory = (CASE WHEN st.max_peak_memory > sr.max_peak_memory then st.max_peak_memory else sr.max_peak_memory end),
sr.memory_skew_percent = (CASE WHEN st.memory_skew_percent > sr.memory_skew_percent then st.memory_skew_percent else sr.memory_skew_percent end),
sr.max_spill_size = (CASE WHEN st.max_spill_size > sr.max_spill_size then st.max_spill_size else sr.max_spill_size end),
sr.average_spill_size = (CASE WHEN st.average_spill_size > sr.average_spill_size then st.average_spill_size else sr.average_spill_size end),
sr.max_cpu_time = (CASE WHEN st.max_cpu_time > sr.max_cpu_time then st.max_cpu_time else sr.max_cpu_time end),
sr.total_cpu_time = (CASE WHEN st.total_cpu_time > sr.total_cpu_time then st.total_cpu_time else sr.total_cpu_time end),
sr.max_peak_iops = (CASE WHEN st.max_peak_iops > sr.max_peak_iops then st.max_peak_iops else sr.max_peak_iops end),
sr.average_peak_iops = (CASE WHEN st.average_peak_iops > sr.average_peak_iops then st.average_peak_iops else sr.average_peak_iops end),
sr.spill_info = (case when sr.spill_info ='All' THEN 'All' WHEN st.spill_info > sr.spill_info then st.spill_info else sr.spill_info end),
sr.warning = st.warning
WHEN NOT MATCHED THEN
INSERT VALUES (
st.query_start
,st.dura
,st.finish
,st.client_hostname
,st.coorname
,st.datname
,st.pid
,st.usename
,st.app_name
,st.waiting
,st.enqueue
,st.state
,st.block_time
,st.estimate_total_time
,st.max_dn_time
,st.max_peak_memory
,st.memory_skew_percent
,st.spill_info
,st.max_spill_size
,st.average_spill_size
,st.max_cpu_time
,st.total_cpu_time
,st.max_peak_iops
,st.average_peak_iops
,st.warning
,st.query_id
,st.query
);
insert into monica.sess_wait_event_hist select * from temp_sess_wait_event;
insert into sess_stat_hist select * from temp_sess_stat;
update monica.act_session_history t set mask_id= md5(monica.f_remove_constants(t.query)) where mask_id = '0' and finish=1 ;
update monica.act_session_history set finish=1 where now() > (query_start + dura + interval '1 minute');
END;
$$ LANGUAGE plpgsql;
3)编写一个实现了uniqueSQL的过程
由于没有ORACLE的绑定变量,所以针对于每个具体的变量(1,2,3,'a','b'...),每个SQL的query_id都是不一样的,所以写一个让其一致,便于后面的信息统计。
set current_schema="monica";
create or replace function f_remove_constants( p_query in text )
return text
as
l_query text;
l_char varchar(10);
l_in_quotes boolean default FALSE;
begin
for i in 1 .. length( p_query )
loop
l_char := substr(p_query,i,1);
if ( l_char = '''' and l_in_quotes )
then
l_in_quotes := FALSE;
elsif ( l_char = '''' and NOT l_in_quotes )
then
l_in_quotes := TRUE;
l_query := l_query || '''#';
end if;
if ( NOT l_in_quotes ) then
l_query := l_query || l_char;
end if;
end loop;
l_query := translate( l_query, '0123456789', '@@@@@@@@@@' );
for i in 0 .. 8 loop
l_query := replace( l_query, lpad('@',10-i,'@'), '@' );
l_query := replace( l_query, lpad(' ',10-i,' '), ' ' );
end loop;
return upper(l_query);
end;
/
4)调起定时任务
call dbms_job.isubmit(1001,'call monica.collect_sess_stat_proc(); ', sysdate, 'interval ''1 minute''');
5)编写一个获取信息的shell脚本
mask_id 相同的,代表是SQL一样只是变量不一致的的SQL文本
#!/bin/bash
#v1.0
function colorit(){
color=31
case $1 in
red)
shift # 截掉$1后,后面所有的参数都可以使用"$@"来表示
color=31
;;
green)
shift
color=32
;;
yellow)
shift
color=33
;;
blue)
shift
color=34
;;
magenta)
shift
color=35
;;
cyan)
shift
color=36
;;
white)
shift
color=37
;;
*)
echo "Usage: colorit red|green|yellow|blue|magenta|cyan|white text_msg"
return 0
;;
esac
echo -e "\e[${color}m$@\e[0m"
}
source ${BIGDATA_HOME}/mppdb/.mppdbgs_profile
_dbname=postgres
case "$1" in
active)
echo "$0 active [omm] [postgres]"
_user="${2:-omm}"
_dbname=${3:-postgres}
gsql -p 25308 -d $_dbname -r -c "select
client_hostname ,
datname ,
coorname ,
pid ,
usename ,
application_name app_name ,
--date_trunc('second',xact_start) xact_start ,
date_trunc('second',query_start) query_start ,
--now()-query_start dura ,
left(to_char(now()-query_start, 'dd HH24:MI:SS'),11) dura ,
waiting w ,
enqueue eq ,
state ,
query_id ,
left(query,30)
from pgxc_stat_activity
where
usename != '${_user}'
and (xact_start is not null or state != 'idle')
order by dura
;"
;;
ash)
_dbname=${3:-postgres}
_nday=${2:-3}
gsql -p 25308 -d $_dbname -r -c "
select
to_char(query_start,'yymmdd hh24:mi:ss') as query_start
,left(to_char(dura, 'dd HH24:MI:SS'),11) as dura
,finish as Y
,client_hostname as Cli_host
,coorname as cn
--,datname
,pid
,left(usename,15) as usename
--,app_name
,waiting as w
,enqueue as e
,state
,trunc(st.block_time/1000/60,2) as blkti
,trunc(st.estimate_total_time/1000/60,2) as estim
,trunc(st.max_dn_time/1000/60,2) as dntx
,max_peak_memory as mmx
,memory_skew_percent as mmskw
,spill_info as spinfo
,max_spill_size as spix
,average_spill_size as spiav
,trunc(st.max_cpu_time/1000/60,2) as cputx
,trunc(st.total_cpu_time/1000/60,2) as allcput
,max_peak_iops as iopx
,average_peak_iops as iopav
--,warning
,query_id
--,query
,left(mask_id,11) as queryid
from scott.act_session_history st
where (trunc(now()) - interval '${_nday} day') < trunc(query_start)
--and mask_id='87dfb26891ac316f11e3eb50e95170e7'
order by 1
;
"
;;
wait)
_dbname=${3:-postgres}
gsql -p 25308 -d $_dbname -r -c "
select query_start,dura,waiting,enqueue,state,a.query_id,thread_name,wait_status,wait_event,count(*) as cnt ,max(query_start::date + dura::interval) as currt_time
from scott.sess_wait_event_hist a
where query_id=$2
group by query_start,dura,waiting,enqueue,state,a.query_id,thread_name,wait_status,wait_event
order by query_start,dura, wait_status
;
select mask_id, spill_info, warning
from scott.act_session_history where query_id=$2;
select
--,left(query, 50) as sqltext
query
from scott.act_session_history where query_id=$2;
"
;;
hist)
_dbname=${4:-postgres}
_nday=${3:-3}
gsql -p 25308 -d $_dbname -r -c "
select
to_char(query_start,'yymmdd hh24:mi:ss') as query_start
,left(to_char(dura, 'dd HH24:MI:SS'),11) as dura
,finish as Y
,client_hostname as Cli_host
,coorname as cn
--,datname
,pid
,left(usename,15) as usename
--,app_name
,waiting as w
,enqueue as e
--,state
,trunc(st.block_time/1000/60,2) as blkti
,trunc(st.estimate_total_time/1000/60,2) as estim
,trunc(st.max_dn_time/1000/60,2) as dntx
,max_peak_memory as mmx
,memory_skew_percent as mmskw
,spill_info as spinfo
,max_spill_size as spix
,average_spill_size as spiav
,trunc(st.max_cpu_time/1000/60,2) as cputx
,trunc(st.total_cpu_time/1000/60,2) as allcput
,max_peak_iops as iopx
,average_peak_iops as iopav
--,warning
,query_id
,left(query, 11) as sqltext
from scott.act_session_history st
where (trunc(now()) - interval '${_nday} day') < trunc(query_start)
and mask_id='$2'
order by 1
;
"
;;
awr)
_dbname=${3:-postgres}
_nday=${2:-3}
colorit red "TOP 20 执行次数"
gsql -p 25308 -d $_dbname -r -c "
select
mask_id,max(query_id) as query_id, usename, min(left(to_char(dura, 'dd HH24:MI:SS'),11)) as min_dura,max(left(to_char(dura, 'dd HH24:MI:SS'),11)) max_dura, count(*) as cnt
from scott.act_session_history st
where (trunc(now()) - interval '${_nday} day') < trunc(query_start)
group by mask_id, usename having count(*)>=1
order by cnt desc, min_dura
limit 20
;
"
colorit red "TOP 20 Execute time"
gsql -p 25308 -d $_dbname -r -c "
select
query_start , dura , y , cli_host , cn , usename , w , e , blkti , estim , dntx , mmx , mmskw , spinfo , spix , spiav , cputx , allcput , iopx , iopav , query_id , mask_id
from (
select
to_char(query_start,'yymmdd hh24:mi:ss') as query_start
,left(to_char(dura, 'dd HH24:MI:SS'),11) as dura
,finish as Y
,client_hostname as Cli_host
,coorname as cn
--,datname
--,pid
,left(usename,15) as usename
--,app_name
,waiting as w
,enqueue as e
--,state
,trunc(st.block_time/1000/60,2) as blkti
,trunc(st.estimate_total_time/1000/60,2) as estim
,trunc(st.max_dn_time/1000/60,2) as dntx
,max_peak_memory as mmx
,memory_skew_percent as mmskw
,spill_info as spinfo
,max_spill_size as spix
,average_spill_size as spiav
,trunc(st.max_cpu_time/1000/60,2) as cputx
,trunc(st.total_cpu_time/1000/60,2) as allcput
,max_peak_iops as iopx
,average_peak_iops as iopav
--,warning
,query_id
--,query
,mask_id
--,left(mask_id,11) as queryid
, Row_number() over(PARTITION by mask_id order by dura desc) as _rank
from scott.act_session_history st
where (trunc(now()) - interval '${_nday} day') < trunc(query_start)
and dura is not null
) where _rank=1
order by dura desc
limit 20
;
"
colorit red "TOP 20 Memory"
gsql -p 25308 -d $_dbname -r -c "
select
query_start , dura , y , cli_host , cn , usename , w , e , blkti , estim , dntx , mmx , mmskw , spinfo , spix , spiav , cputx , allcput , iopx , iopav , query_id , mask_id
from (
select
to_char(query_start,'yymmdd hh24:mi:ss') as query_start
,left(to_char(dura, 'dd HH24:MI:SS'),11) as dura
,finish as Y
,client_hostname as Cli_host
,coorname as cn
--,datname
--,pid
,left(usename,15) as usename
--,app_name
,waiting as w
,enqueue as e
--,state
,trunc(st.block_time/1000/60,2) as blkti
,trunc(st.estimate_total_time/1000/60,2) as estim
,trunc(st.max_dn_time/1000/60,2) as dntx
,max_peak_memory as mmx
,memory_skew_percent as mmskw
,spill_info as spinfo
,max_spill_size as spix
,average_spill_size as spiav
,trunc(st.max_cpu_time/1000/60,2) as cputx
,trunc(st.total_cpu_time/1000/60,2) as allcput
,max_peak_iops as iopx
,average_peak_iops as iopav
--,warning
,query_id
--,query
,mask_id
--,left(mask_id,11) as queryid
, Row_number() over(PARTITION by mask_id order by max_peak_memory desc) as _rank
from scott.act_session_history st
where (trunc(now()) - interval '${_nday} day') < trunc(query_start)
and max_peak_memory is not null
) where _rank=1
order by mmx desc
limit 20
;
"
colorit red "TOP 20 IOPS"
gsql -p 25308 -d $_dbname -r -c "
select
query_start , dura , y , cli_host , cn , usename , w , e , blkti , estim , dntx , mmx , mmskw , spinfo , spix , spiav , cputx , allcput , iopx , iopav , query_id , mask_id
from (
select
to_char(query_start,'yymmdd hh24:mi:ss') as query_start
,left(to_char(dura, 'dd HH24:MI:SS'),11) as dura
,finish as Y
,client_hostname as Cli_host
,coorname as cn
--,datname
--,pid
,left(usename,15) as usename
--,app_name
,waiting as w
,enqueue as e
--,state
,trunc(st.block_time/1000/60,2) as blkti
,trunc(st.estimate_total_time/1000/60,2) as estim
,trunc(st.max_dn_time/1000/60,2) as dntx
,max_peak_memory as mmx
,memory_skew_percent as mmskw
,spill_info as spinfo
,max_spill_size as spix
,average_spill_size as spiav
,trunc(st.max_cpu_time/1000/60,2) as cputx
,trunc(st.total_cpu_time/1000/60,2) as allcput
,max_peak_iops as iopx
,average_peak_iops as iopav
--,warning
,query_id
--,query
,mask_id
--,left(mask_id,11) as queryid
, Row_number() over(PARTITION by mask_id order by max_peak_iops desc) as _rank
from scott.act_session_history st
where (trunc(now()) - interval '${_nday} day') < trunc(query_start)
and max_peak_iops is not null
) where _rank=1
order by iopx desc
limit 20
;
"
colorit red "TOP 20 DiskWrite"
gsql -p 25308 -d $_dbname -r -c "
select
query_start , dura , y , cli_host , cn , usename , w , e , blkti , estim , dntx , mmx , mmskw , spinfo , spix , spiav , cputx , allcput , iopx , iopav , query_id , mask_id
from (
select
to_char(query_start,'yymmdd hh24:mi:ss') as query_start
,left(to_char(dura, 'dd HH24:MI:SS'),11) as dura
,finish as Y
,client_hostname as Cli_host
,coorname as cn
,left(usename,15) as usename
,waiting as w
,enqueue as e
,trunc(st.block_time/1000/60,2) as blkti
,trunc(st.estimate_total_time/1000/60,2) as estim
,trunc(st.max_dn_time/1000/60,2) as dntx
,max_peak_memory as mmx
,memory_skew_percent as mmskw
,spill_info as spinfo
,max_spill_size as spix
,average_spill_size as spiav
,trunc(st.max_cpu_time/1000/60,2) as cputx
,trunc(st.total_cpu_time/1000/60,2) as allcput
,max_peak_iops as iopx
,average_peak_iops as iopav
,query_id
,mask_id
, Row_number() over(PARTITION by mask_id order by max_spill_size desc) as _rank
from scott.act_session_history st
where (trunc(now()) - interval '${_nday} day') < trunc(query_start)
and max_spill_size is not null
) where _rank=1
order by spix desc
limit 20
;
"
colorit red "TOP 20 CPU"
gsql -p 25308 -d $_dbname -r -c "
select
query_start , dura , y , cli_host , cn , usename , w , e , blkti , estim , dntx , mmx , mmskw , spinfo , spix , spiav , cputx , allcput , iopx , iopav , query_id , mask_id
from (
select
to_char(query_start,'yymmdd hh24:mi:ss') as query_start
,left(to_char(dura, 'dd HH24:MI:SS'),11) as dura
,finish as Y
,client_hostname as Cli_host
,coorname as cn
,left(usename,15) as usename
,waiting as w
,enqueue as e
,trunc(st.block_time/1000/60,2) as blkti
,trunc(st.estimate_total_time/1000/60,2) as estim
,trunc(st.max_dn_time/1000/60,2) as dntx
,max_peak_memory as mmx
,memory_skew_percent as mmskw
,spill_info as spinfo
,max_spill_size as spix
,average_spill_size as spiav
,trunc(st.max_cpu_time/1000/60,2) as cputx
,trunc(st.total_cpu_time/1000/60,2) as allcput
,max_peak_iops as iopx
,average_peak_iops as iopav
,query_id
,mask_id
, Row_number() over(PARTITION by mask_id order by total_cpu_time desc) as _rank
from scott.act_session_history st
where (trunc(now()) - interval '${_nday} day') < trunc(query_start)
and total_cpu_time is not null
) where _rank=1
order by allcput desc
limit 20
;
"
;;
para)
echo "$0 para paraname"
gsql -p 25308 -d $_dbname -r -c "
select name,context,vartype,left(setting,70) setting
--,left(category,30) category
,left(short_desc,80) short_desc from pg_settings where name like '%$2%'
;
"
;;
*)
echo "001) dba hist"
echo "002) dba active"
echo "003) dba wait"
echo "004) dba awr"
echo "005) dba para"
;;
esac
6)使用案例
# 默认查看3 天
dba ash
# 查看 n 天内的SQL运行情况,按启动时间排序
dba ash n
# 查看某条SQL的执行情况
dba wait query_id
# 查看相同的SQL的历史执行情况
# mask_id 是将SQL处理后生成的md5值,可以等同于query_id,不同变量的SQL,其mask_id一样
dba hist mask_id
# 查看n天内,iops/内存/cpu/数据落盘/执行次数最多/执行时长最高的SQL统计
dba awr n
查看当天的运行时长超过n分钟的SQL记录:
统计 3 天的高消耗 SQL: 比如找到了 3 天内,query_id=366198944724461271 执行了 305 次。
查找具体的SQL运行信息:query_id=366198944724461271
根据等待事件,可以推测出该表数据倾斜严重,大部分时间都在两个dn上进行等待。
查找该类SQL的历史执行情况:可以看到历史上,该SQL的执行所消耗的资源,时长是否稳定。根据mask_id查找。
总结)
1,act_session_history 是通过merge来实现插入与更新,会导致表膨胀,需要做vacuum full或者直接truncate就完事。
2,运行小于 3 分钟的SQL 也会有可能导致系统性能问题,该过程并没有将其统计收集。但是就这样吧。
3,将就使用吧。
- 点赞
- 收藏
- 关注作者
评论(0)