GaussDB(DWS)性能调优:SQL中关联发散引起的性能瓶颈问题案例

举报
O泡果奶~ 发表于 2023/12/13 22:13:36 2023/12/13
【摘要】 SQL语句执行过程中,存在数据发散导致影响语句执行效率。本文针对这一类场景利用提前过滤(提前去重成本较小的情况,即最后输出结果并没有使用过多该表的值)进行分析解决

1、【问题描述】

SQL语句执行过程中,存在关联发散导致结果集数量过大,从而影响语句执行性能

2、【原始SQL】

WITH V_DATA AS (
  SELECT D.LV1_PROD_LIST_EN_NAME                                                            AS LV1_RPOD_EN_NAME,
         F.EXTERNAL_NAME                                                                    AS EXTERNAL_NAME,
         D.LV1_PROD_LIST_CODE                                                               AS LV1_PROD_LIST_CODE,
         SUM(CASE WHEN F.PERIOD_ID=20230510 THEN F.SO_QTY ELSE NULL END)           AS SO_QTY_ACT1,
         SUM(CASE WHEN F.PERIOD_ID=20230509 THEN F.SO_QTY ELSE NULL END)          AS SO_QTY_ACT2,
         SUM(CASE WHEN F.PERIOD_ID=20230508 THEN F.SO_QTY ELSE NULL END)          AS SO_QTY_ACT3,
         SUM(CASE WHEN F.PERIOD_ID=20230510 THEN F.SO_QTY_LAST ELSE NULL END)      AS SO_QTY_LAST,
         SUM(CASE WHEN F.PERIOD_ID=20230510 THEN F.SO_QTY_WTD ELSE NULL END)       AS SO_QTY_WTD1,
         SUM(CASE WHEN F.PERIOD_ID=20230507 THEN F.SO_QTY_WTD ELSE NULL END)      AS SO_QTY_WTD2,  
         SUM(CASE WHEN F.PERIOD_ID=20230430 THEN F.SO_QTY_WTD ELSE NULL END)      AS SO_QTY_WTD3,
         SUM(CASE WHEN F.PERIOD_ID=20230510 THEN F.SO_QTY_WTD_LAST ELSE NULL END)  AS SO_QTY_WTD_LAST, 
         SUM(CASE WHEN F.PERIOD_ID=20230510 THEN F.SO_QTY_MTD ELSE NULL END)       AS SO_QTY_MTD1, 
         SUM(CASE WHEN F.PERIOD_ID=20230430 THEN F.SO_QTY_MTD ELSE NULL END)      AS SO_QTY_MTD2,
         SUM(CASE WHEN F.PERIOD_ID=20230331 THEN F.SO_QTY_MTD ELSE NULL END)      AS SO_QTY_MTD3,
         SUM(CASE WHEN F.PERIOD_ID=20230510 THEN F.SO_QTY_MTD_LAST ELSE NULL END)  AS SO_QTY_MTD_LAST         
    FROM BI_DASHBOARD.DM_MSS_OS_PC_AREA_SO_COL_D_F F
    INNER JOIN BI_DASHBOARD.DM_MSS_ITEM_PRODUCT_D D ON F.EXTERNAL_NAME = D.MKT_NAME AND D.LV1_PROD_LIST_FLAG = 'MH'
    WHERE 1=1
    AND F.PERIOD_ID IN (20230510,20230509,20230508,20230507,20230430,20230430,20230331)
    AND F.EXTERNAL_NAME IN ( SELECT MKT_NAME EXTERNAL_NAME  
		       FROM BI_DASHBOARD.DM_MSS_ITEM_PRODUCT_D PRD
		      WHERE PRD.COMPANY_BRAND =any(array[string_to_array('HUAWEI,HONER',',')])
		            AND MKT_NAME<>'SNULL' GROUP BY MKT_NAME)
    AND (F.LARGE_AREA_CODE IN  ('') OR '' IS NULL OR ''= '')
    AND (F.SMALL_AREA_CODE IN  ('') OR '' IS NULL OR ''= '')
    
    
     AND F.ECOSYSTEM = 'AG'  
     AND F.SALES_FLAG = '1' 
     AND F.ORG_CODE=F.SIGN_COUNTRY_PROVINCE_CODE 
    GROUP BY F.EXTERNAL_NAME,D.LV1_PROD_LIST_EN_NAME,D.LV1_PROD_LIST_CODE
  )
  , 
V_END AS (SELECT PRODUCT_NAME,
       NVL(LV1_PROD_CODE,'-')        AS LV1_PROD_CODE,
       NVL(LV1_PROD_LIST_CODE,'Total') AS LV1_PROD_LIST_CODE,
       NVL(TO_CHAR(SO_QTY_ACT1),'-') AS SO_QTY_ACT1,
       NVL(TO_CHAR(SO_QTY_ACT2),'-') AS SO_QTY_ACT2,
       NVL(TO_CHAR(SO_QTY_ACT3),'-') AS SO_QTY_ACT3,
       NVL(TO_CHAR(DOD        ),'-') AS DOD,
       NVL(TO_CHAR(SO_QTY_WTD1),'-') AS SO_QTY_WTD1,
       NVL(TO_CHAR(SO_QTY_WTD2),'-') AS SO_QTY_WTD2,
       NVL(TO_CHAR(SO_QTY_WTD3),'-') AS SO_QTY_WTD3,
       NVL(TO_CHAR(WOW        ),'-') AS WOW, 
       NVL(TO_CHAR(SO_QTY_MTD1),'-') AS SO_QTY_MTD1,
       NVL(TO_CHAR(SO_QTY_MTD2),'-') AS SO_QTY_MTD2,
       NVL(TO_CHAR(SO_QTY_MTD3),'-') AS SO_QTY_MTD3,
       NVL(TO_CHAR(MOM        ),'-') AS MOM 
FROM(      
SELECT NVL(T.EXTERNAL_NAME,NVL(T.LV1_RPOD_EN_NAME,'Total')) AS PRODUCT_NAME,
       T.LV1_PROD_LIST_CODE,
       CASE WHEN T.EXTERNAL_NAME IS NULL THEN T.LV1_PROD_LIST_CODE
       ELSE '-' END AS LV1_PROD_CODE,
       SUM(T.SO_QTY_ACT1) AS SO_QTY_ACT1,
       SUM(T.SO_QTY_ACT2) AS SO_QTY_ACT2,
       SUM(T.SO_QTY_ACT3) AS SO_QTY_ACT3,
       to_number(ROUND(DECODE(SUM(T.SO_QTY_LAST),0,NULL, SUM(T.SO_QTY_ACT1)/SUM(T.SO_QTY_LAST)-1),3)) AS DOD,
       SUM(T.SO_QTY_WTD1) AS SO_QTY_WTD1,
       SUM(T.SO_QTY_WTD2) AS SO_QTY_WTD2,
       SUM(T.SO_QTY_WTD3) AS SO_QTY_WTD3,
       to_number(ROUND(DECODE(SUM(T.SO_QTY_WTD_LAST),0,NULL, SUM(T.SO_QTY_WTD1)/SUM(T.SO_QTY_WTD_LAST)-1),3)) AS WOW,
       SUM(T.SO_QTY_MTD1) AS SO_QTY_MTD1,
       SUM(T.SO_QTY_MTD2) AS SO_QTY_MTD2,
       SUM(T.SO_QTY_MTD3) AS SO_QTY_MTD3,
       to_number(ROUND(DECODE(SUM(T.SO_QTY_MTD_LAST),0,NULL, SUM(T.SO_QTY_MTD1)/SUM(T.SO_QTY_MTD_LAST)-1),3)) AS MOM
  FROM V_DATA T
  GROUP BY GROUPING SETS((),(T.LV1_RPOD_EN_NAME,T.LV1_PROD_LIST_CODE),(T.LV1_RPOD_EN_NAME,T.LV1_PROD_LIST_CODE,T.EXTERNAL_NAME)))
  WHERE (SO_QTY_ACT1 <> 0 OR
         SO_QTY_ACT2 <> 0 OR
         SO_QTY_ACT3 <> 0 OR
         DOD <> 0 OR 
         SO_QTY_WTD1 <> 0 OR
         SO_QTY_WTD2 <> 0 OR
         SO_QTY_WTD3 <> 0 OR
         WOW <> 0 OR
         SO_QTY_MTD1 <> 0 OR
         SO_QTY_MTD2 <> 0 OR 
         SO_QTY_MTD3 <> 0 OR
         MOM <> 0)),
V_LV1_PROD AS (
  SELECT PRODUCT_NAME,
         LV1_PROD_LIST_CODE,
         ROW_NUMBER() OVER(ORDER BY SO_QTY_ACT1 DESC) AS RN1
    FROM (SELECT PRODUCT_NAME,LV1_PROD_LIST_CODE ,TO_NUMBER(SO_QTY_ACT1 ) AS SO_QTY_ACT1  FROM V_END WHERE LV1_PROD_CODE <> '-' AND SO_QTY_ACT1  <> '-')    
  GROUP BY PRODUCT_NAME,LV1_PROD_LIST_CODE ,SO_QTY_ACT1 
UNION ALL
  SELECT PRODUCT_NAME,
         LV1_PROD_LIST_CODE,
         ROW_NUMBER() OVER(ORDER BY SO_QTY_ACT1 DESC) * 100 AS RN1
    FROM V_END 
    WHERE LV1_PROD_CODE <> '-' AND SO_QTY_ACT1  = '-'
  GROUP BY PRODUCT_NAME,LV1_PROD_LIST_CODE ,SO_QTY_ACT1    
 ),
V_EXTERNAL_NAME AS (
  SELECT PRODUCT_NAME,
         LV1_PROD_LIST_CODE,
         ROW_NUMBER() OVER(PARTITION BY LV1_PROD_LIST_CODE ORDER BY SO_QTY_ACT1 DESC) AS RN2
    FROM (SELECT PRODUCT_NAME,LV1_PROD_LIST_CODE,TO_NUMBER(SO_QTY_ACT1 ) AS SO_QTY_ACT1  FROM V_END WHERE LV1_PROD_CODE = '-' AND LV1_PROD_LIST_CODE IS NOT NULL AND SO_QTY_ACT1  <>'-')
    GROUP BY PRODUCT_NAME,LV1_PROD_LIST_CODE,SO_QTY_ACT1 
 UNION ALL
   SELECT PRODUCT_NAME,
         LV1_PROD_LIST_CODE,
         ROW_NUMBER() OVER(PARTITION BY LV1_PROD_LIST_CODE ORDER BY SO_QTY_ACT1 DESC,PRODUCT_NAME) * 100 AS RN2
    FROM V_END
    WHERE LV1_PROD_CODE = '-' AND LV1_PROD_LIST_CODE IS NOT NULL AND SO_QTY_ACT1  ='-'
    GROUP BY PRODUCT_NAME,LV1_PROD_LIST_CODE,SO_QTY_ACT1 
 ) ,
 V_ORDER_RN AS (
 SELECT A.PRODUCT_NAME,
        DECODE(A.LV1_PROD_LIST_CODE,'Total',0,'SNULL',2,1) AS RN,
        CASE WHEN A.PRODUCT_NAME='Total' THEN '0'
             WHEN A.LV1_PROD_CODE ='-' AND A.LV1_PROD_LIST_CODE ='SNULL' THEN B.RN1*10000000||'-'||C.RN2 
             WHEN A.LV1_PROD_CODE ='-' AND A.LV1_PROD_LIST_CODE <> 'SNULL' THEN B.RN1*100000||'-'||C.RN2 
             WHEN A.LV1_PROD_CODE <> '-' AND A.LV1_PROD_LIST_CODE <> 'SNULL' THEN TO_CHAR(B.RN1*100000)
             ELSE TO_CHAR(B.RN1*10000000) END AS LEVEL,
        CASE WHEN A.PRODUCT_NAME='Total' THEN 0
             WHEN A.LV1_PROD_CODE ='-' AND A.LV1_PROD_LIST_CODE ='SNULL' THEN B.RN1*10000000+C.RN2 
             WHEN A.LV1_PROD_CODE ='-' AND A.LV1_PROD_LIST_CODE <> 'SNULL' THEN B.RN1*100000+C.RN2 
             WHEN A.LV1_PROD_CODE <> '-' AND A.LV1_PROD_LIST_CODE <> 'SNULL' THEN B.RN1*100000
             ELSE B.RN1*10000000 END AS ORDER_RN,
        A.LV1_PROD_CODE,
        A.SO_QTY_ACT1,
        A.SO_QTY_ACT2,
        A.SO_QTY_ACT3,
        A.DOD,
        A.SO_QTY_WTD1,
        A.SO_QTY_WTD2,
        A.SO_QTY_WTD3,
        A.WOW, 
        A.SO_QTY_MTD1,
        A.SO_QTY_MTD2,
        A.SO_QTY_MTD3,
        A.MOM 
FROM V_END A
LEFT JOIN V_LV1_PROD B ON A.LV1_PROD_LIST_CODE=B.LV1_PROD_LIST_CODE
LEFT JOIN V_EXTERNAL_NAME C ON A.PRODUCT_NAME=C.PRODUCT_NAME
 )
SELECT
        A.PRODUCT_NAME,
        A.LEVEL,
        A.LV1_PROD_CODE,
        A.SO_QTY_ACT1,
        A.SO_QTY_ACT2,
        A.SO_QTY_ACT3,
        A.DOD,
        A.SO_QTY_WTD1,
        A.SO_QTY_WTD2,
        A.SO_QTY_WTD3,
        A.WOW, 
        A.SO_QTY_MTD1,
        A.SO_QTY_MTD2,
        A.SO_QTY_MTD3,
        A.MOM  
FROM V_ORDER_RN A
ORDER BY PRODUCT_NAME,RN,ORDER_RN
LIMIT 30  OFFSET 0 

3、【性能分析】

原始SQL的performance执行计划如下图所示(完整计划见附件一):
image.png
从图中标记的地方可以看出,SQL在执行过程中,f表与d表关联时存在严重的数据发散现象,同时,sort排序处处理大数据量的结果集也导致执行时间过长。对原始SQL进行分析,发现该处子查询最后输出的f表的值仅为EXTERNAL_NAME 并且关联条件也是EXTERNAL_NAME ,此时,可以针对该SQL进行改写,可以先对f表的EXTERNAL_NAME值进行去重再关联,从而降低窗口函数中排序时的数据量,以提高语句执行效率。
改写方式如下所示(f表仅出现在 V_DATA 中,故仅对其进行改写):

SELECT D.LV1_PROD_LIST_EN_NAME                                                            AS LV1_RPOD_EN_NAME,
         F.EXTERNAL_NAME                                                                    AS EXTERNAL_NAME,
         D.LV1_PROD_LIST_CODE                                                               AS LV1_PROD_LIST_CODE,
         SUM(F.SO_QTY_ACT1)           AS SO_QTY_ACT1,
         SUM(F.SO_QTY_ACT2)          AS SO_QTY_ACT2,
         SUM(F.SO_QTY_ACT3)          AS SO_QTY_ACT3,
         SUM(F.SO_QTY_LAST)      AS SO_QTY_LAST,
         SUM(F.SO_QTY_WTD1)       AS SO_QTY_WTD1,
         SUM(F.SO_QTY_WTD2)      AS SO_QTY_WTD2,  
         SUM(F.SO_QTY_WTD3)      AS SO_QTY_WTD3,
         SUM(F.SO_QTY_WTD_LAST)  AS SO_QTY_WTD_LAST, 
         SUM(F.SO_QTY_MTD1)       AS SO_QTY_MTD1, 
         SUM(F.SO_QTY_MTD2)      AS SO_QTY_MTD2,
         SUM(F.SO_QTY_MTD3)      AS SO_QTY_MTD3,
         SUM(F.SO_QTY_MTD_LAST)  AS SO_QTY_MTD_LAST         
    FROM (
	 select EXTERNAL_NAME,
		SUM(CASE WHEN F.PERIOD_ID=20230510 THEN F.SO_QTY ELSE NULL END)           AS SO_QTY_ACT1,
         	SUM(CASE WHEN F.PERIOD_ID=20230509 THEN F.SO_QTY ELSE NULL END)          AS SO_QTY_ACT2,
         	SUM(CASE WHEN F.PERIOD_ID=20230508 THEN F.SO_QTY ELSE NULL END)          AS SO_QTY_ACT3,
         	SUM(CASE WHEN F.PERIOD_ID=20230510 THEN F.SO_QTY_LAST ELSE NULL END)      AS SO_QTY_LAST,
         	SUM(CASE WHEN F.PERIOD_ID=20230510 THEN F.SO_QTY_WTD ELSE NULL END)       AS SO_QTY_WTD1,
         	SUM(CASE WHEN F.PERIOD_ID=20230507 THEN F.SO_QTY_WTD ELSE NULL END)      AS SO_QTY_WTD2,  
         	SUM(CASE WHEN F.PERIOD_ID=20230430 THEN F.SO_QTY_WTD ELSE NULL END)      AS SO_QTY_WTD3,
         	SUM(CASE WHEN F.PERIOD_ID=20230510 THEN F.SO_QTY_WTD_LAST ELSE NULL END)  AS SO_QTY_WTD_LAST, 
         	SUM(CASE WHEN F.PERIOD_ID=20230510 THEN F.SO_QTY_MTD ELSE NULL END)       AS SO_QTY_MTD1, 
         	SUM(CASE WHEN F.PERIOD_ID=20230430 THEN F.SO_QTY_MTD ELSE NULL END)      AS SO_QTY_MTD2,
         	SUM(CASE WHEN F.PERIOD_ID=20230331 THEN F.SO_QTY_MTD ELSE NULL END)      AS SO_QTY_MTD3,
         	SUM(CASE WHEN F.PERIOD_ID=20230510 THEN F.SO_QTY_MTD_LAST ELSE NULL END)  AS SO_QTY_MTD_LAST   
	 from BI_DASHBOARD.DM_MSS_OS_PC_AREA_SO_COL_D_F F 
	 WHERE F.PERIOD_ID IN (20230510,20230509,20230508,20230507,20230430,20230430,20230331)
   	 AND F.EXTERNAL_NAME IN ( SELECT MKT_NAME EXTERNAL_NAME 
				  FROM BI_DASHBOARD.DM_MSS_ITEM_PRODUCT_D PRD
		      		  WHERE PRD.COMPANY_BRAND =any(array[string_to_array('HUAWEI,HONER',',')])
	 AND MKT_NAME<>'SNULL' GROUP BY MKT_NAME)
    	 AND (F.LARGE_AREA_CODE IN  ('') OR '' IS NULL OR ''= '')
    	 AND (F.SMALL_AREA_CODE IN  ('') OR '' IS NULL OR ''= '')
    	 AND F.ORG_CODE=F.SIGN_COUNTRY_PROVINCE_CODE 
 	 AND F.ECOSYSTEM = 'AG'  
     	 AND F.SALES_FLAG = '1' 
	 group by EXTERNAL_NAME)F
    INNER JOIN BI_DASHBOARD.DM_MSS_ITEM_PRODUCT_D D ON F.EXTERNAL_NAME = D.MKT_NAME AND D.LV1_PROD_LIST_FLAG = 'MH'
    WHERE 1=1
    GROUP BY F.EXTERNAL_NAME,D.LV1_PROD_LIST_EN_NAME,D.LV1_PROD_LIST_CODE

在改写的SQL中,先对f表进行去重,然后再让其与d表进行关联,其执行计划如下图所示(完整计划见附件二):
image.png
从图中可以看到,SQL语句执行时间从90s+优化到了1s+

【版权声明】本文为华为云社区用户原创内容,转载时必须标注文章的来源(华为云社区)、文章链接、文章作者等基本信息, 否则作者和本社区有权追究责任。如果您发现本社区中有涉嫌抄袭的内容,欢迎发送邮件进行举报,并提供相关证据,一经查实,本社区将立刻删除涉嫌侵权内容,举报邮箱: cloudbbs@huaweicloud.com
  • 点赞
  • 收藏
  • 关注作者

评论(0

0/1000
抱歉,系统识别当前为高风险访问,暂不支持该操作

全部回复

上滑加载中

设置昵称

在此一键设置昵称,即可参与社区互动!

*长度不超过10个汉字或20个英文字符,设置后3个月内不可修改。

*长度不超过10个汉字或20个英文字符,设置后3个月内不可修改。