GaussDB(DWS)性能调优:SQL中关联发散引起的性能瓶颈问题案例
【摘要】 SQL语句执行过程中,存在数据发散导致影响语句执行效率。本文针对这一类场景利用提前过滤(提前去重成本较小的情况,即最后输出结果并没有使用过多该表的值)进行分析解决
1、【问题描述】
SQL语句执行过程中,存在关联发散导致结果集数量过大,从而影响语句执行性能
2、【原始SQL】
WITH V_DATA AS (
SELECT D.LV1_PROD_LIST_EN_NAME AS LV1_RPOD_EN_NAME,
F.EXTERNAL_NAME AS EXTERNAL_NAME,
D.LV1_PROD_LIST_CODE AS LV1_PROD_LIST_CODE,
SUM(CASE WHEN F.PERIOD_ID=20230510 THEN F.SO_QTY ELSE NULL END) AS SO_QTY_ACT1,
SUM(CASE WHEN F.PERIOD_ID=20230509 THEN F.SO_QTY ELSE NULL END) AS SO_QTY_ACT2,
SUM(CASE WHEN F.PERIOD_ID=20230508 THEN F.SO_QTY ELSE NULL END) AS SO_QTY_ACT3,
SUM(CASE WHEN F.PERIOD_ID=20230510 THEN F.SO_QTY_LAST ELSE NULL END) AS SO_QTY_LAST,
SUM(CASE WHEN F.PERIOD_ID=20230510 THEN F.SO_QTY_WTD ELSE NULL END) AS SO_QTY_WTD1,
SUM(CASE WHEN F.PERIOD_ID=20230507 THEN F.SO_QTY_WTD ELSE NULL END) AS SO_QTY_WTD2,
SUM(CASE WHEN F.PERIOD_ID=20230430 THEN F.SO_QTY_WTD ELSE NULL END) AS SO_QTY_WTD3,
SUM(CASE WHEN F.PERIOD_ID=20230510 THEN F.SO_QTY_WTD_LAST ELSE NULL END) AS SO_QTY_WTD_LAST,
SUM(CASE WHEN F.PERIOD_ID=20230510 THEN F.SO_QTY_MTD ELSE NULL END) AS SO_QTY_MTD1,
SUM(CASE WHEN F.PERIOD_ID=20230430 THEN F.SO_QTY_MTD ELSE NULL END) AS SO_QTY_MTD2,
SUM(CASE WHEN F.PERIOD_ID=20230331 THEN F.SO_QTY_MTD ELSE NULL END) AS SO_QTY_MTD3,
SUM(CASE WHEN F.PERIOD_ID=20230510 THEN F.SO_QTY_MTD_LAST ELSE NULL END) AS SO_QTY_MTD_LAST
FROM BI_DASHBOARD.DM_MSS_OS_PC_AREA_SO_COL_D_F F
INNER JOIN BI_DASHBOARD.DM_MSS_ITEM_PRODUCT_D D ON F.EXTERNAL_NAME = D.MKT_NAME AND D.LV1_PROD_LIST_FLAG = 'MH'
WHERE 1=1
AND F.PERIOD_ID IN (20230510,20230509,20230508,20230507,20230430,20230430,20230331)
AND F.EXTERNAL_NAME IN ( SELECT MKT_NAME EXTERNAL_NAME
FROM BI_DASHBOARD.DM_MSS_ITEM_PRODUCT_D PRD
WHERE PRD.COMPANY_BRAND =any(array[string_to_array('HUAWEI,HONER',',')])
AND MKT_NAME<>'SNULL' GROUP BY MKT_NAME)
AND (F.LARGE_AREA_CODE IN ('') OR '' IS NULL OR ''= '')
AND (F.SMALL_AREA_CODE IN ('') OR '' IS NULL OR ''= '')
AND F.ECOSYSTEM = 'AG'
AND F.SALES_FLAG = '1'
AND F.ORG_CODE=F.SIGN_COUNTRY_PROVINCE_CODE
GROUP BY F.EXTERNAL_NAME,D.LV1_PROD_LIST_EN_NAME,D.LV1_PROD_LIST_CODE
)
,
V_END AS (SELECT PRODUCT_NAME,
NVL(LV1_PROD_CODE,'-') AS LV1_PROD_CODE,
NVL(LV1_PROD_LIST_CODE,'Total') AS LV1_PROD_LIST_CODE,
NVL(TO_CHAR(SO_QTY_ACT1),'-') AS SO_QTY_ACT1,
NVL(TO_CHAR(SO_QTY_ACT2),'-') AS SO_QTY_ACT2,
NVL(TO_CHAR(SO_QTY_ACT3),'-') AS SO_QTY_ACT3,
NVL(TO_CHAR(DOD ),'-') AS DOD,
NVL(TO_CHAR(SO_QTY_WTD1),'-') AS SO_QTY_WTD1,
NVL(TO_CHAR(SO_QTY_WTD2),'-') AS SO_QTY_WTD2,
NVL(TO_CHAR(SO_QTY_WTD3),'-') AS SO_QTY_WTD3,
NVL(TO_CHAR(WOW ),'-') AS WOW,
NVL(TO_CHAR(SO_QTY_MTD1),'-') AS SO_QTY_MTD1,
NVL(TO_CHAR(SO_QTY_MTD2),'-') AS SO_QTY_MTD2,
NVL(TO_CHAR(SO_QTY_MTD3),'-') AS SO_QTY_MTD3,
NVL(TO_CHAR(MOM ),'-') AS MOM
FROM(
SELECT NVL(T.EXTERNAL_NAME,NVL(T.LV1_RPOD_EN_NAME,'Total')) AS PRODUCT_NAME,
T.LV1_PROD_LIST_CODE,
CASE WHEN T.EXTERNAL_NAME IS NULL THEN T.LV1_PROD_LIST_CODE
ELSE '-' END AS LV1_PROD_CODE,
SUM(T.SO_QTY_ACT1) AS SO_QTY_ACT1,
SUM(T.SO_QTY_ACT2) AS SO_QTY_ACT2,
SUM(T.SO_QTY_ACT3) AS SO_QTY_ACT3,
to_number(ROUND(DECODE(SUM(T.SO_QTY_LAST),0,NULL, SUM(T.SO_QTY_ACT1)/SUM(T.SO_QTY_LAST)-1),3)) AS DOD,
SUM(T.SO_QTY_WTD1) AS SO_QTY_WTD1,
SUM(T.SO_QTY_WTD2) AS SO_QTY_WTD2,
SUM(T.SO_QTY_WTD3) AS SO_QTY_WTD3,
to_number(ROUND(DECODE(SUM(T.SO_QTY_WTD_LAST),0,NULL, SUM(T.SO_QTY_WTD1)/SUM(T.SO_QTY_WTD_LAST)-1),3)) AS WOW,
SUM(T.SO_QTY_MTD1) AS SO_QTY_MTD1,
SUM(T.SO_QTY_MTD2) AS SO_QTY_MTD2,
SUM(T.SO_QTY_MTD3) AS SO_QTY_MTD3,
to_number(ROUND(DECODE(SUM(T.SO_QTY_MTD_LAST),0,NULL, SUM(T.SO_QTY_MTD1)/SUM(T.SO_QTY_MTD_LAST)-1),3)) AS MOM
FROM V_DATA T
GROUP BY GROUPING SETS((),(T.LV1_RPOD_EN_NAME,T.LV1_PROD_LIST_CODE),(T.LV1_RPOD_EN_NAME,T.LV1_PROD_LIST_CODE,T.EXTERNAL_NAME)))
WHERE (SO_QTY_ACT1 <> 0 OR
SO_QTY_ACT2 <> 0 OR
SO_QTY_ACT3 <> 0 OR
DOD <> 0 OR
SO_QTY_WTD1 <> 0 OR
SO_QTY_WTD2 <> 0 OR
SO_QTY_WTD3 <> 0 OR
WOW <> 0 OR
SO_QTY_MTD1 <> 0 OR
SO_QTY_MTD2 <> 0 OR
SO_QTY_MTD3 <> 0 OR
MOM <> 0)),
V_LV1_PROD AS (
SELECT PRODUCT_NAME,
LV1_PROD_LIST_CODE,
ROW_NUMBER() OVER(ORDER BY SO_QTY_ACT1 DESC) AS RN1
FROM (SELECT PRODUCT_NAME,LV1_PROD_LIST_CODE ,TO_NUMBER(SO_QTY_ACT1 ) AS SO_QTY_ACT1 FROM V_END WHERE LV1_PROD_CODE <> '-' AND SO_QTY_ACT1 <> '-')
GROUP BY PRODUCT_NAME,LV1_PROD_LIST_CODE ,SO_QTY_ACT1
UNION ALL
SELECT PRODUCT_NAME,
LV1_PROD_LIST_CODE,
ROW_NUMBER() OVER(ORDER BY SO_QTY_ACT1 DESC) * 100 AS RN1
FROM V_END
WHERE LV1_PROD_CODE <> '-' AND SO_QTY_ACT1 = '-'
GROUP BY PRODUCT_NAME,LV1_PROD_LIST_CODE ,SO_QTY_ACT1
),
V_EXTERNAL_NAME AS (
SELECT PRODUCT_NAME,
LV1_PROD_LIST_CODE,
ROW_NUMBER() OVER(PARTITION BY LV1_PROD_LIST_CODE ORDER BY SO_QTY_ACT1 DESC) AS RN2
FROM (SELECT PRODUCT_NAME,LV1_PROD_LIST_CODE,TO_NUMBER(SO_QTY_ACT1 ) AS SO_QTY_ACT1 FROM V_END WHERE LV1_PROD_CODE = '-' AND LV1_PROD_LIST_CODE IS NOT NULL AND SO_QTY_ACT1 <>'-')
GROUP BY PRODUCT_NAME,LV1_PROD_LIST_CODE,SO_QTY_ACT1
UNION ALL
SELECT PRODUCT_NAME,
LV1_PROD_LIST_CODE,
ROW_NUMBER() OVER(PARTITION BY LV1_PROD_LIST_CODE ORDER BY SO_QTY_ACT1 DESC,PRODUCT_NAME) * 100 AS RN2
FROM V_END
WHERE LV1_PROD_CODE = '-' AND LV1_PROD_LIST_CODE IS NOT NULL AND SO_QTY_ACT1 ='-'
GROUP BY PRODUCT_NAME,LV1_PROD_LIST_CODE,SO_QTY_ACT1
) ,
V_ORDER_RN AS (
SELECT A.PRODUCT_NAME,
DECODE(A.LV1_PROD_LIST_CODE,'Total',0,'SNULL',2,1) AS RN,
CASE WHEN A.PRODUCT_NAME='Total' THEN '0'
WHEN A.LV1_PROD_CODE ='-' AND A.LV1_PROD_LIST_CODE ='SNULL' THEN B.RN1*10000000||'-'||C.RN2
WHEN A.LV1_PROD_CODE ='-' AND A.LV1_PROD_LIST_CODE <> 'SNULL' THEN B.RN1*100000||'-'||C.RN2
WHEN A.LV1_PROD_CODE <> '-' AND A.LV1_PROD_LIST_CODE <> 'SNULL' THEN TO_CHAR(B.RN1*100000)
ELSE TO_CHAR(B.RN1*10000000) END AS LEVEL,
CASE WHEN A.PRODUCT_NAME='Total' THEN 0
WHEN A.LV1_PROD_CODE ='-' AND A.LV1_PROD_LIST_CODE ='SNULL' THEN B.RN1*10000000+C.RN2
WHEN A.LV1_PROD_CODE ='-' AND A.LV1_PROD_LIST_CODE <> 'SNULL' THEN B.RN1*100000+C.RN2
WHEN A.LV1_PROD_CODE <> '-' AND A.LV1_PROD_LIST_CODE <> 'SNULL' THEN B.RN1*100000
ELSE B.RN1*10000000 END AS ORDER_RN,
A.LV1_PROD_CODE,
A.SO_QTY_ACT1,
A.SO_QTY_ACT2,
A.SO_QTY_ACT3,
A.DOD,
A.SO_QTY_WTD1,
A.SO_QTY_WTD2,
A.SO_QTY_WTD3,
A.WOW,
A.SO_QTY_MTD1,
A.SO_QTY_MTD2,
A.SO_QTY_MTD3,
A.MOM
FROM V_END A
LEFT JOIN V_LV1_PROD B ON A.LV1_PROD_LIST_CODE=B.LV1_PROD_LIST_CODE
LEFT JOIN V_EXTERNAL_NAME C ON A.PRODUCT_NAME=C.PRODUCT_NAME
)
SELECT
A.PRODUCT_NAME,
A.LEVEL,
A.LV1_PROD_CODE,
A.SO_QTY_ACT1,
A.SO_QTY_ACT2,
A.SO_QTY_ACT3,
A.DOD,
A.SO_QTY_WTD1,
A.SO_QTY_WTD2,
A.SO_QTY_WTD3,
A.WOW,
A.SO_QTY_MTD1,
A.SO_QTY_MTD2,
A.SO_QTY_MTD3,
A.MOM
FROM V_ORDER_RN A
ORDER BY PRODUCT_NAME,RN,ORDER_RN
LIMIT 30 OFFSET 0
3、【性能分析】
原始SQL的performance执行计划如下图所示(完整计划见附件一):
从图中标记的地方可以看出,SQL在执行过程中,f表与d表关联时存在严重的数据发散现象,同时,sort排序处处理大数据量的结果集也导致执行时间过长。对原始SQL进行分析,发现该处子查询最后输出的f表的值仅为EXTERNAL_NAME 并且关联条件也是EXTERNAL_NAME ,此时,可以针对该SQL进行改写,可以先对f表的EXTERNAL_NAME值进行去重再关联,从而降低窗口函数中排序时的数据量,以提高语句执行效率。
改写方式如下所示(f表仅出现在 V_DATA 中,故仅对其进行改写):
SELECT D.LV1_PROD_LIST_EN_NAME AS LV1_RPOD_EN_NAME,
F.EXTERNAL_NAME AS EXTERNAL_NAME,
D.LV1_PROD_LIST_CODE AS LV1_PROD_LIST_CODE,
SUM(F.SO_QTY_ACT1) AS SO_QTY_ACT1,
SUM(F.SO_QTY_ACT2) AS SO_QTY_ACT2,
SUM(F.SO_QTY_ACT3) AS SO_QTY_ACT3,
SUM(F.SO_QTY_LAST) AS SO_QTY_LAST,
SUM(F.SO_QTY_WTD1) AS SO_QTY_WTD1,
SUM(F.SO_QTY_WTD2) AS SO_QTY_WTD2,
SUM(F.SO_QTY_WTD3) AS SO_QTY_WTD3,
SUM(F.SO_QTY_WTD_LAST) AS SO_QTY_WTD_LAST,
SUM(F.SO_QTY_MTD1) AS SO_QTY_MTD1,
SUM(F.SO_QTY_MTD2) AS SO_QTY_MTD2,
SUM(F.SO_QTY_MTD3) AS SO_QTY_MTD3,
SUM(F.SO_QTY_MTD_LAST) AS SO_QTY_MTD_LAST
FROM (
select EXTERNAL_NAME,
SUM(CASE WHEN F.PERIOD_ID=20230510 THEN F.SO_QTY ELSE NULL END) AS SO_QTY_ACT1,
SUM(CASE WHEN F.PERIOD_ID=20230509 THEN F.SO_QTY ELSE NULL END) AS SO_QTY_ACT2,
SUM(CASE WHEN F.PERIOD_ID=20230508 THEN F.SO_QTY ELSE NULL END) AS SO_QTY_ACT3,
SUM(CASE WHEN F.PERIOD_ID=20230510 THEN F.SO_QTY_LAST ELSE NULL END) AS SO_QTY_LAST,
SUM(CASE WHEN F.PERIOD_ID=20230510 THEN F.SO_QTY_WTD ELSE NULL END) AS SO_QTY_WTD1,
SUM(CASE WHEN F.PERIOD_ID=20230507 THEN F.SO_QTY_WTD ELSE NULL END) AS SO_QTY_WTD2,
SUM(CASE WHEN F.PERIOD_ID=20230430 THEN F.SO_QTY_WTD ELSE NULL END) AS SO_QTY_WTD3,
SUM(CASE WHEN F.PERIOD_ID=20230510 THEN F.SO_QTY_WTD_LAST ELSE NULL END) AS SO_QTY_WTD_LAST,
SUM(CASE WHEN F.PERIOD_ID=20230510 THEN F.SO_QTY_MTD ELSE NULL END) AS SO_QTY_MTD1,
SUM(CASE WHEN F.PERIOD_ID=20230430 THEN F.SO_QTY_MTD ELSE NULL END) AS SO_QTY_MTD2,
SUM(CASE WHEN F.PERIOD_ID=20230331 THEN F.SO_QTY_MTD ELSE NULL END) AS SO_QTY_MTD3,
SUM(CASE WHEN F.PERIOD_ID=20230510 THEN F.SO_QTY_MTD_LAST ELSE NULL END) AS SO_QTY_MTD_LAST
from BI_DASHBOARD.DM_MSS_OS_PC_AREA_SO_COL_D_F F
WHERE F.PERIOD_ID IN (20230510,20230509,20230508,20230507,20230430,20230430,20230331)
AND F.EXTERNAL_NAME IN ( SELECT MKT_NAME EXTERNAL_NAME
FROM BI_DASHBOARD.DM_MSS_ITEM_PRODUCT_D PRD
WHERE PRD.COMPANY_BRAND =any(array[string_to_array('HUAWEI,HONER',',')])
AND MKT_NAME<>'SNULL' GROUP BY MKT_NAME)
AND (F.LARGE_AREA_CODE IN ('') OR '' IS NULL OR ''= '')
AND (F.SMALL_AREA_CODE IN ('') OR '' IS NULL OR ''= '')
AND F.ORG_CODE=F.SIGN_COUNTRY_PROVINCE_CODE
AND F.ECOSYSTEM = 'AG'
AND F.SALES_FLAG = '1'
group by EXTERNAL_NAME)F
INNER JOIN BI_DASHBOARD.DM_MSS_ITEM_PRODUCT_D D ON F.EXTERNAL_NAME = D.MKT_NAME AND D.LV1_PROD_LIST_FLAG = 'MH'
WHERE 1=1
GROUP BY F.EXTERNAL_NAME,D.LV1_PROD_LIST_EN_NAME,D.LV1_PROD_LIST_CODE
在改写的SQL中,先对f表进行去重,然后再让其与d表进行关联,其执行计划如下图所示(完整计划见附件二):
从图中可以看到,SQL语句执行时间从90s+优化到了1s+。
【版权声明】本文为华为云社区用户原创内容,转载时必须标注文章的来源(华为云社区)、文章链接、文章作者等基本信息, 否则作者和本社区有权追究责任。如果您发现本社区中有涉嫌抄袭的内容,欢迎发送邮件进行举报,并提供相关证据,一经查实,本社区将立刻删除涉嫌侵权内容,举报邮箱:
cloudbbs@huaweicloud.com
- 点赞
- 收藏
- 关注作者
评论(0)