GaussDB(DWS)性能调优:agg计算倾斜优化-单层agg相关的redistribute倾斜

举报
譡里个檔 发表于 2023/06/16 21:37:39 2023/06/16
【摘要】 GROUP BY/DISTINCT导致的计算倾斜场景的性能优化

1. 背景信息

原始SQL文本

insert into DWLFCA.DWL_FCA_CONSOL_F(bu_code, businesscode, businessdesc, buy_from_ic, company_code, data_category_code, data_produce_type_flag, data_source_flag, del_flag, dept_code, diff_alloc_type, dw_last_update_date, expensetype, group_account_code, ic_code, je_category_code, je_source_code, lc_code, lc_end_bal_amt, period_id, proj_num, rmb_end_bal_amt, sub_account_code, tc_code, tc_end_bal_amt, terminal_flag)
select 
    S.bu_code, S.businesscode, S.businessdesc, S.buy_from_ic, S.company_code, S.data_category_code, S.data_produce_type_flag, S.data_source_flag, S.del_flag, S.dept_code, S.diff_alloc_type, S.dw_last_update_date, S.expensetype, S.group_account_code, S.ic_code, S.je_category_code, S.je_source_code, S.lc_code, S.lc_end_bal_amt, S.period_id, S.proj_num, S.rmb_end_bal_amt, S.sub_account_code, S.tc_code, S.tc_end_bal_amt, S.terminal_flag
from (SELECT F.PERIOD_ID,
        F.BUSINESSCODE,
        F.BUSINESSDESC,
        F.COMPANY_CODE,
        F.IC_CODE,
        F.ACCOUNTCODE AS GROUP_ACCOUNT_CODE,
        NVL(RATE.TO_LC_CODE, F.LC_CODE) AS LC_CODE,
        CASE
          WHEN NVL(GRP.COA_REPORT_LOC_CODE, 'SNULL') = 'P'||'&'||'L item according to Z value' THEN
            CASE
          WHEN NVL(DEPT.DEPT_FIN_RPT_EXPS_TYPE_CODE, 'ZX') NOT IN('Z1', 'Z2', 'Z3', 'Z4', 'Z5', 'Z7') THEN
            'ZX'
          ELSE
            NVL(DEPT.DEPT_FIN_RPT_EXPS_TYPE_CODE, 'ZX')
        END ELSE '[None]' END AS EXPENSETYPE,
        SUM(F.LC_ORIG_END_MARKUP_AMT * NVL(RATE.EXCHANGE_RATE, 1)) AS LC_END_BAL_AMT,
        SUM(F.AMOUNT_RMB) AS RMB_END_BAL_AMT,
        SUM(F.AMOUNT_T) AS TC_END_BAL_AMT,
        'SNULL' AS BUY_FROM_IC,
        'SNULL' AS DIFF_ALLOC_TYPE,
        JS.JE_SOURCE_CODE AS JE_SOURCE_CODE,
        JC.JE_CATEGORY_CODE AS JE_CATEGORY_CODE,
        'SNULL' AS TERMINAL_FLAG,
        'SNULL' AS TC_CODE,
        'SNULL' AS BU_CODE,
        F.DATA_CATEGORY_CODE AS DATA_CATEGORY_CODE,
        F.COA_DEPT_CODE AS DEPT_CODE,
        'SNULL' AS SUB_ACCOUNT_CODE,
        'SNULL' AS PROJ_NUM,
        'C' AS DATA_PRODUCE_TYPE_FLAG,
        F.DATA_SOURCE_FLAG AS DATA_SOURCE_FLAG,
        'N' AS DEL_FLAG,
        SYSDATE DW_LAST_UPDATE_DATE
    FROM DWLFCA.DWL_FCA_U03_TMP F, DWRDIM.DWR_DIM_GRP_ACCT_CODE_D GRP, DWRDIM.DWR_DIM_DEPARTMENT_D DEPT, DWRDIM.DWR_DIM_JOURNAL_SOURCE_D JS, DWRDIM.DWR_DIM_JOURNAL_CATEGORY_D JC, DWLAR.DWR_AR_CUR_SWITCH_CONFIG_F RATE
    WHERE F.GROUP_ACCOUNT_CODE = GRP.GROUP_ACCOUNT_CODE
    AND F.JE_SOURCE_ID = JS.JE_SOURCE_ID(+)
    AND F.JE_CATEGORY_ID = JC.JE_CATEGORY_ID(+)
    AND F.COA_DEPT_CODE = DEPT.DEPT_CODE(+)
    AND DEPT.SCD_ACTIVE_BEGIN_DATE(+) <= LAST_DAY(TO_DATE(202208, 'YYYYMM'))
    AND DEPT.SCD_ACTIVE_END_DATE(+) > LAST_DAY(TO_DATE(202208, 'YYYYMM'))
    AND NOT REGEXP_LIKE(F.GROUP_ACCOUNT_CODE, '^(1340499|58007)')
    AND F.COMPANY_CODE = RATE.COMPANY_CODE(+)
    AND F.LC_CODE = RATE.FROM_LC_CODE(+)
    AND RATE.PERIOD_ID(+) <= 202208
    AND F.PERIOD_ID = 202208
    GROUP BY F.PERIOD_ID,
        F.BUSINESSCODE,
        F.BUSINESSDESC,
        F.COMPANY_CODE,
        F.IC_CODE,
        F.ACCOUNTCODE,
        NVL(RATE.TO_LC_CODE, F.LC_CODE),
        CASE
          WHEN NVL(GRP.COA_REPORT_LOC_CODE, 'SNULL') = 'P'||'&'||'L item according to Z value' THEN
            CASE
          WHEN NVL(DEPT.DEPT_FIN_RPT_EXPS_TYPE_CODE, 'ZX') NOT IN('Z1', 'Z2', 'Z3', 'Z4', 'Z5', 'Z7') THEN
            'ZX'
          ELSE
            NVL(DEPT.DEPT_FIN_RPT_EXPS_TYPE_CODE, 'ZX')
        END ELSE '[None]' END,
        JS.JE_SOURCE_CODE,
        JC.JE_CATEGORY_CODE,
        F.DATA_CATEGORY_CODE,
        F.COA_DEPT_CODE,/*F.SOURCE_FLAG,*/
        NVL(RATE.EXCHANGE_RATE, 1),
        F.DATA_SOURCE_FLAG
    HAVING ABS(SUM(F.LC_ORIG_END_MARKUP_AMT * NVL(RATE.EXCHANGE_RATE, 1))) > 0.0001 
        OR ABS(SUM(F.AMOUNT_RMB)) > 0.0001
        OR ABS(SUM(F.AMOUNT_T)) > 0.0001 
) S

SQL语句在历史topSQL的信息(PostgreSQL库的pgxc_wlm_session_info)中的query_plan字段记录的计划执行信息

 1 | Row Adapter  (cost=334933.00..334933.00 rows=1 width=550) (actual time=36526.094..36526.094 rows=0 loops=1)
 2 |  ->Vector Streaming (type: GATHER)  (cost=334913.18..334933.00 rows=1 width=550) (actual time=36526.088..36526.088 rows=0 loops=1)
   |    Node/s: All datanodes
 3 |   ->Vector Insert on dwlfca.dwl_fca_consol_f  (cost=334909.18..334922.12 rows=1511 width=550) (actual time=[36099.640,36406.140]..[36170.702,36480.985], rows=132328)
 4 |    ->Vector Streaming(type: ROUNDROBIN)  (cost=334909.18..334916.09 rows=1511 width=550) (actual time=[36096.748,36397.057]..[36165.186,36472.938], rows=132328)
   |      Spawn on: All datanodes
 5 |     ->Vector Subquery Scan on s  (cost=334909.18..334910.99 rows=1511 width=550) (actual time=[35546.470,35546.470]..[35628.484,36291.093], rows=132328)
 6 |      ->Vector Hash Aggregate  (cost=334909.18..334909.99 rows=1511 width=402) (actual time=[35546.468,35546.468]..[35628.479,36115.475], rows=132328)
   |        Group By Key: f.period_id, f.businesscode, f.businessdesc, f.company_code, f.ic_code, f.accountcode, (COALESCE(rate.to_lc_code, f.lc_code)), (CASE WHEN ((COALESCE(grp.coa_report_loc_code, 'SNULL'::character varying))::text = 'P&L item according to Z value'::text) THEN CASE WHEN ((COALESCE(dept.dept_fin_rpt_exps_type_code, 'ZX'::character varying))::text <> ALL ('{Z1,Z2,Z3,Z4,Z5,Z7}'::text[])) THEN 'ZX'::character varying ELSE COALESCE(dept.dept_fin_rpt_exps_type_code, 'ZX'::character varying) END ELSE '[None]'::character varying END), js.je_source_code, jc.je_category_code, f.data_category_code, f.coa_dept_code, (COALESCE(rate.exchange_rate, 1::numeric)), f.data_source_flag
   |        Filter: ((abs(sum((f.lc_orig_end_markup_amt * (COALESCE(rate.exchange_rate, 1::numeric))))) > 0.0001) OR (abs(sum(f.amount_rmb)) > 0.0001) OR (abs(sum(f.amount_t)) > 0.0001))
 7 |       ->Vector Streaming(type: REDISTRIBUTE)  (cost=13237.05..334908.14 rows=1511 width=210) (actual time=[2367.495,11416.342]..[35603.205,35603.205], rows=26419471)
   |         Distribute Key: js.je_source_code
   |         Spawn on: All datanodes
 8 |        ->Vector Hash Left Join (9, 19)  (cost=13237.05..334905.81 rows=1511 width=210) (actual time=[2343.188,3573.131]..[2506.890,24426.175], rows=26419471)
   |          Hash Cond: (((f.company_code)::text = (rate.company_code)::text) AND ((f.lc_code)::text = (rate.from_lc_code)::text))
   |          Skew Join Optimized by Statistic
                Max Memory Used : 8KB
                Min Memory Used : 0KB
 9 |         ->Vector Streaming(type: PART REDISTRIBUTE PART ROUNDROBIN)  (cost=13226.15..334894.57 rows=1511 distinct=19.00 width=120) (actual time=[2296.539,3071.793]..[2448.444,24307.583], rows=26419471)
   |           Distribute Key: f.company_code, f.lc_code
   |           Skew Filter(type: ROUNDROBIN): ((((f.company_code)::text = '0021'::text) AND ((f.lc_code)::text = 'RMB'::text)) OR (((f.company_code)::text = '0021'::text) AND ((f.lc_code)::text = 'EUR'::text)) OR (((f.company_code)::text = '0021'::text) AND ((f.lc_code)::text = 'ZAR'::text)) OR (((f.company_code)::text = '2821'::text) AND ((f.lc_code)::text = 'RMB'::text)) OR (((f.company_code)::text = '2821'::text) AND ((f.lc_code)::text = 'EUR'::text)) OR (((f.company_code)::text = '2821'::text) AND ((f.lc_code)::text = 'ZAR'::text)) OR (((f.company_code)::text = '0461'::text) AND ((f.lc_code)::text = 'RMB'::text)) OR (((f.company_code)::text = '0461'::text) AND ((f.lc_code)::text = 'EUR'::text)) OR (((f.company_code)::text = '0461'::text) AND ((f.lc_code)::text = 'ZAR'::text)))
   |           Spawn on: All datanodes
10 |          ->Vector Hash Right Join (11, 12)  (cost=13226.15..334891.77 rows=1511 width=120) (actual time=[2266.306,3067.990]..[2747.013,3646.273], rows=26419471)
   |            Hash Cond: ((dept.dept_code)::text = (f.coa_dept_code)::text)
                    Max File Num: 32
                    Min File Num: 32
11 |           ->CStore Scan on dwrdim.dwr_dim_department_d dept  (cost=0.00..321414.96 rows=7972480 distinct=9134.00 width=10) (actual time=[0.820,397.084]..[2.524,481.826], rows=5761760)
   |             Filter: ((dept.scd_active_begin_date <= last_day(to_date('202208'::text, 'YYYYMM'::text))) AND (dept.scd_active_end_date > last_day(to_date('202208'::text, 'YYYYMM'::text))))
   |             Rows Removed by Filter: 49173040
   |             Pushdown Predicate Filter: ((dept.scd_active_begin_date <= last_day(to_date('202208'::text, 'YYYYMM'::text))) AND (dept.scd_active_end_date > last_day(to_date('202208'::text, 'YYYYMM'::text))))
12 |           ->Vector Sonic Hash Join (13,14)  (cost=3912.10..13226.02 rows=784 distinct=10.00 width=117) (actual time=[1437.626,1515.324]..[1774.936,1872.239], rows=26419471)
   |             Hash Cond: ((grp.group_account_code)::text = (f.group_account_code)::text)
                      Max Memory Used : 98911KB
                      Min Memory Used : 98878KB
13 |            ->CStore Scan on dwrdim.dwr_dim_grp_acct_code_d grp  (cost=0.00..9235.15 rows=2517360 distinct=31467.00 width=18) (actual time=[0.466,12.351]..[0.799,17.382], rows=2516960)
   |              Filter: ((grp.group_account_code)::text !~ '^(1340499|58007)'::text)
   |              Rows Removed by Filter: 640
14 |            ->Vector Hash Right Join (15, 16)  (cost=575.39..3911.97 rows=784 distinct=9.00 width=114) (actual time=[1319.064,1342.321]..[1630.605,1656.793], rows=26419471)
   |              Hash Cond: (jc.je_category_id = f.je_category_id)
                        Max Memory Used : 164043KB
                        Min Memory Used : 164022KB
15 |             ->CStore Scan on dwrdim.dwr_dim_journal_category_d jc  (cost=0.00..3301.88 rows=1110000 distinct=13875.00 width=21) (actual time=[0.081,0.234]..[0.258,0.605], rows=1110000)
16 |             ->Vector Hash Right Join (17, 18)  (cost=10.19..575.27 rows=784 distinct=8.00 width=105) (actual time=[469.782,1140.672]..[605.921,1381.536], rows=26419471)
   |               Hash Cond: (js.je_source_id = f.je_source_id)
                          Max Memory Used : 164038KB
                          Min Memory Used : 164011KB
17 |              ->CStore Scan on dwrdim.dwr_dim_journal_source_d js  (cost=0.00..563.56 rows=45040 distinct=563.00 width=16) (actual time=[0.086,0.112]..[1.386,1.417], rows=45040)
18 |              ->CStore Scan on dwlfca.dwl_fca_u03_tmp f  (cost=0.00..10.06 rows=784 distinct=10.00 width=100) (actual time=[9.033,273.954]..[22.866,342.889], rows=26419471)
   |                Filter: (((f.group_account_code)::text !~ '^(1340499|58007)'::text) AND (f.period_id = 202208::numeric))
   |                Rows Removed by Filter: 554577
   |                Pushdown Predicate Filter: (f.period_id = 202208::numeric)
19 |         ->Vector Streaming(type: PART REDISTRIBUTE PART BROADCAST)  (cost=0.00..10.85 rows=267 distinct=3.00 width=266) (actual time=[0.118,0.118]..[60.165,60.165], rows=7)
   |           Distribute Key: rate.company_code, rate.from_lc_code
   |           Skew Filter(type: BROADCAST): ((((rate.company_code)::text = '0021'::text) AND ((rate.from_lc_code)::text = 'RMB'::text)) OR (((rate.company_code)::text = '0021'::text) AND ((rate.from_lc_code)::text = 'EUR'::text)) OR (((rate.company_code)::text = '0021'::text) AND ((rate.from_lc_code)::text = 'ZAR'::text)) OR (((rate.company_code)::text = '2821'::text) AND ((rate.from_lc_code)::text = 'RMB'::text)) OR (((rate.company_code)::text = '2821'::text) AND ((rate.from_lc_code)::text = 'EUR'::text)) OR (((rate.company_code)::text = '2821'::text) AND ((rate.from_lc_code)::text = 'ZAR'::text)) OR (((rate.company_code)::text = '0461'::text) AND ((rate.from_lc_code)::text = 'RMB'::text)) OR (((rate.company_code)::text = '0461'::text) AND ((rate.from_lc_code)::text = 'EUR'::text)) OR (((rate.company_code)::text = '0461'::text) AND ((rate.from_lc_code)::text = 'ZAR'::text)))
   |           Spawn on: All datanodes
20 |          ->CStore Scan on dwlar.dwr_ar_cur_switch_config_f rate  (cost=0.00..10.04 rows=267 width=266) (actual time=[0.012,0.012]..[0.293,0.304], rows=7)
   |            Filter: (rate.period_id <= 202208::numeric)
   |            Pushdown Predicate Filter: (rate.period_id <= 202208::numeric)

SQL语句在历史topSQL的信息(PostgreSQL库的pgxc_wlm_session_info)中的query_plan字段记录的SQL自诊断信息(warning字段)

Hash table conflict
Execute diagnostic information
	PlanNode[18] Inaccurate Estimation-Rows: "CStore Scan" A-Rows:26419471, E-Rows:784
	PlanNode[7] DataSkew:"Vector Streaming(type: REDISTRIBUTE)", min_dn_tuples:0, max_dn_tuples:26349828

2. 分析过程

自诊断信息显示语句在id = 7(PlanNode[7])的stream算子上存在计算倾斜,即数据经过重分布之后,不同DN上的数据量偏差严重,导致各个DN上的agg计算(stream上层算子)不均衡。

2.1 解决id = 7的stream算子倾斜

id = 7的stream算子的上层是HashAgg,对应与原始语句的GROUP BY操作。对于这种场景可以把所有列作为stream操作的分布列(在GROUP BY语句所处子查询hint设置设置参数agg_redistribute_enhancement=on)来解决

insert into DWLFCA.DWL_FCA_CONSOL_F(bu_code, businesscode, businessdesc, buy_from_ic, company_code, data_category_code, data_produce_type_flag, data_source_flag, del_flag, dept_code, diff_alloc_type, dw_last_update_date, expensetype, group_account_code, ic_code, je_category_code, je_source_code, lc_code, lc_end_bal_amt, period_id, proj_num, rmb_end_bal_amt, sub_account_code, tc_code, tc_end_bal_amt, terminal_flag)
select 
    S.bu_code, S.businesscode, S.businessdesc, S.buy_from_ic, S.company_code, S.data_category_code, S.data_produce_type_flag, S.data_source_flag, S.del_flag, S.dept_code, S.diff_alloc_type, S.dw_last_update_date, S.expensetype, S.group_account_code, S.ic_code, S.je_category_code, S.je_source_code, S.lc_code, S.lc_end_bal_amt, S.period_id, S.proj_num, S.rmb_end_bal_amt, S.sub_account_code, S.tc_code, S.tc_end_bal_amt, S.terminal_flag
from (SELECT /*+ set(agg_redistribute_enhancement on) */ F.PERIOD_ID,
        F.BUSINESSCODE,
        F.BUSINESSDESC,
        F.COMPANY_CODE,
        F.IC_CODE,
        F.ACCOUNTCODE AS GROUP_ACCOUNT_CODE,
        NVL(RATE.TO_LC_CODE, F.LC_CODE) AS LC_CODE,
        CASE
          WHEN NVL(GRP.COA_REPORT_LOC_CODE, 'SNULL') = 'P'||'&'||'L item according to Z value' THEN
            CASE
          WHEN NVL(DEPT.DEPT_FIN_RPT_EXPS_TYPE_CODE, 'ZX') NOT IN('Z1', 'Z2', 'Z3', 'Z4', 'Z5', 'Z7') THEN
            'ZX'
          ELSE
            NVL(DEPT.DEPT_FIN_RPT_EXPS_TYPE_CODE, 'ZX')
        END ELSE '[None]' END AS EXPENSETYPE,
        SUM(F.LC_ORIG_END_MARKUP_AMT * NVL(RATE.EXCHANGE_RATE, 1)) AS LC_END_BAL_AMT,
        SUM(F.AMOUNT_RMB) AS RMB_END_BAL_AMT,
        SUM(F.AMOUNT_T) AS TC_END_BAL_AMT,
        'SNULL' AS BUY_FROM_IC,
        'SNULL' AS DIFF_ALLOC_TYPE,
        JS.JE_SOURCE_CODE AS JE_SOURCE_CODE,
        JC.JE_CATEGORY_CODE AS JE_CATEGORY_CODE,
        'SNULL' AS TERMINAL_FLAG,
        'SNULL' AS TC_CODE,
        'SNULL' AS BU_CODE,
        F.DATA_CATEGORY_CODE AS DATA_CATEGORY_CODE,
        F.COA_DEPT_CODE AS DEPT_CODE,
        'SNULL' AS SUB_ACCOUNT_CODE,
        'SNULL' AS PROJ_NUM,
        'C' AS DATA_PRODUCE_TYPE_FLAG,
        F.DATA_SOURCE_FLAG AS DATA_SOURCE_FLAG,
        'N' AS DEL_FLAG,
        SYSDATE DW_LAST_UPDATE_DATE
    FROM DWLFCA.DWL_FCA_U03_TMP F, DWRDIM.DWR_DIM_GRP_ACCT_CODE_D GRP, DWRDIM.DWR_DIM_DEPARTMENT_D DEPT, DWRDIM.DWR_DIM_JOURNAL_SOURCE_D JS, DWRDIM.DWR_DIM_JOURNAL_CATEGORY_D JC, DWLAR.DWR_AR_CUR_SWITCH_CONFIG_F RATE
    WHERE F.GROUP_ACCOUNT_CODE = GRP.GROUP_ACCOUNT_CODE
    AND F.JE_SOURCE_ID = JS.JE_SOURCE_ID(+)
    AND F.JE_CATEGORY_ID = JC.JE_CATEGORY_ID(+)
    AND F.COA_DEPT_CODE = DEPT.DEPT_CODE(+)
    AND DEPT.SCD_ACTIVE_BEGIN_DATE(+) <= LAST_DAY(TO_DATE(202208, 'YYYYMM'))
    AND DEPT.SCD_ACTIVE_END_DATE(+) > LAST_DAY(TO_DATE(202208, 'YYYYMM'))
    AND NOT REGEXP_LIKE(F.GROUP_ACCOUNT_CODE, '^(1340499|58007)')
    AND F.COMPANY_CODE = RATE.COMPANY_CODE(+)
    AND F.LC_CODE = RATE.FROM_LC_CODE(+)
    AND RATE.PERIOD_ID(+) <= 202208
    AND F.PERIOD_ID = 202208
    GROUP BY F.PERIOD_ID,
        F.BUSINESSCODE,
        F.BUSINESSDESC,
        F.COMPANY_CODE,
        F.IC_CODE,
        F.ACCOUNTCODE,
        NVL(RATE.TO_LC_CODE, F.LC_CODE),
        CASE
          WHEN NVL(GRP.COA_REPORT_LOC_CODE, 'SNULL') = 'P'||'&'||'L item according to Z value' THEN
            CASE
          WHEN NVL(DEPT.DEPT_FIN_RPT_EXPS_TYPE_CODE, 'ZX') NOT IN('Z1', 'Z2', 'Z3', 'Z4', 'Z5', 'Z7') THEN
            'ZX'
          ELSE
            NVL(DEPT.DEPT_FIN_RPT_EXPS_TYPE_CODE, 'ZX')
        END ELSE '[None]' END,
        JS.JE_SOURCE_CODE,
        JC.JE_CATEGORY_CODE,
        F.DATA_CATEGORY_CODE,
        F.COA_DEPT_CODE,/*F.SOURCE_FLAG,*/
        NVL(RATE.EXCHANGE_RATE, 1),
        F.DATA_SOURCE_FLAG
    HAVING ABS(SUM(F.LC_ORIG_END_MARKUP_AMT * NVL(RATE.EXCHANGE_RATE, 1))) > 0.0001 
        OR ABS(SUM(F.AMOUNT_RMB)) > 0.0001
        OR ABS(SUM(F.AMOUNT_T)) > 0.0001 
) S

对语句执行explain performance(为避免对业务表dwlfca.dwl_fca_consol_f数据的影响,实际执行是没有带INSERT动作,只从SELECT部分开始执行),简单信息如下。完整信息见附件: 计算倾斜-agg计算倾斜-performance-1.txt

 id |                                     operation                                     |        A-time        |  A-rows  |  E-rows  | E-distinct |  Peak Memory   |    E-memory    |  A-width  | E-width |  E-costs   
----+-----------------------------------------------------------------------------------+----------------------+----------+----------+------------+----------------+----------------+-----------+---------+------------
  1 | ->  Row Adapter                                                                   | 5168.717             |   132328 |   145582 |            | 236KB          |                |           |     551 | 3638066.31 
  2 |    ->  Vector Streaming (type: GATHER)                                            | 5149.832             |   132328 |   145582 |            | 1033KB         |                |           |     551 | 3638066.31 
  3 |       ->  Vector Subquery Scan on s                                               | [2789.557, 2845.496] |   132328 |   145582 |            | [504KB, 504KB] | 1MB            |           |     551 | 3636704.28 
  4 |          ->  Vector Hash Aggregate                                                | [2789.494, 2845.454] |   132328 |   145582 |            | [16MB, 16MB]   | 396MB(40887MB) | [455,455] |     403 | 3636686.08 
  5 |             ->  Vector Streaming(type: REDISTRIBUTE)                              | [2006.849, 2596.699] | 26419471 | 29116464 |            | [2MB, 2MB]     | 3MB            |           |     211 | 2031581.85 
  6 |                ->  Vector Hash Left Join (7, 16)                                  | [967.623, 1845.970]  | 26419471 | 29116464 |            | [1MB, 1MB]     | 16MB           |           |     211 | 418307.78  
  7 |                   ->  Vector Hash Left Join (8, 15)                               | [898.897, 1658.157]  | 26419471 | 29116464 | 182        | [1MB, 1MB]     | 16MB           |           |     121 | 411806.97  
  8 |                      ->  Vector Hash Left Join (9, 14)                            | [852.508, 1540.882]  | 26419471 | 29116464 | 14         | [2MB, 2MB]     | 16MB           |           |     116 | 406286.57  
  9 |                         ->  Vector Sonic Hash Join (10,13)                        | [829.424, 1469.429]  | 26419471 | 29116464 | 21         | [2MB, 2MB]     | 16MB           |           |     107 | 401357.61  
 10 |                            ->  Vector Hash Left Join (11, 12)                     | [789.820, 1372.810]  | 26419471 | 29116464 | 77         | [6MB, 6MB]     | 16MB           |           |     104 | 386724.73  
 11 |                               ->  CStore Scan on dwlfca.dwl_fca_u03_tmp f(FACT)   | [288.349, 487.663]   | 26419471 | 26469513 | 5036       | [3MB, 3MB]     | 1MB            |           |     101 | 9709.06    
 12 |                               ->  CStore Scan on dwrdim.dwr_dim_department_d dept | [407.524, 718.272]   |  5761760 |  7972480 | 9134       | [6MB, 6MB]     | 1MB            | [48,48]   |      10 | 321414.96  
 13 |                            ->  CStore Scan on dwrdim.dwr_dim_grp_acct_code_d grp  | [11.845, 19.763]     |  2516960 |  2517360 | 31467      | [3MB, 3MB]     | 1MB            |           |      18 | 9235.15    
 14 |                         ->  CStore Scan on dwrdim.dwr_dim_journal_category_d jc   | [0.198, 0.447]       |  1110000 |  1110000 | 13875      | [1MB, 1MB]     | 1MB            | [59,59]   |      21 | 3301.88    
 15 |                      ->  CStore Scan on dwrdim.dwr_dim_journal_source_d js        | [0.055, 0.169]       |    45040 |    45040 | 563        | [976KB, 976KB] | 1MB            | [55,55]   |      16 | 563.56     
 16 |                   ->  Vector Streaming(type: BROADCAST)                           | [0.140, 0.245]       |      560 |    21360 | 160        | [1MB, 1MB]     | 3MB            | [88,88]   |     266 | 203.93     
 17 |                      ->  CStore Scan on dwlar.dwr_ar_cur_switch_config_f rate     | [0.013, 0.333]       |        7 |      267 |            | [1MB, 1MB]     | 1MB            |           |     266 | 10.04      

                                                                              Predicate Information (identified by plan id)                                                                              
---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
  4 --Vector Hash Aggregate
        Filter: ((abs(sum((f.lc_orig_end_markup_amt * (COALESCE(rate.exchange_rate, 1::numeric))))) > 0.0001) OR (abs(sum(f.amount_rmb)) > 0.0001) OR (abs(sum(f.amount_t)) > 0.0001))
  6 --Vector Hash Left Join (7, 16)

可以看出已经没有提示计算倾斜的告警

2.2 双层agg策略进一步优化

我们分析上面的计划可以发现计划层面上存在继续优化的空间:id=4的HashAgg算子的输出行数是132328,只有其下层算子Vector Streaming(type: REDISTRIBUTE)输出行数(26419471)的0.5%。


我们可以使用双层agg策略(通过hint设置参数best_agg_plan=3实现)进行进一步优化

insert into DWLFCA.DWL_FCA_CONSOL_F(bu_code, businesscode, businessdesc, buy_from_ic, company_code, data_category_code, data_produce_type_flag, data_source_flag, del_flag, dept_code, diff_alloc_type, dw_last_update_date, expensetype, group_account_code, ic_code, je_category_code, je_source_code, lc_code, lc_end_bal_amt, period_id, proj_num, rmb_end_bal_amt, sub_account_code, tc_code, tc_end_bal_amt, terminal_flag)
select 
    S.bu_code, S.businesscode, S.businessdesc, S.buy_from_ic, S.company_code, S.data_category_code, S.data_produce_type_flag, S.data_source_flag, S.del_flag, S.dept_code, S.diff_alloc_type, S.dw_last_update_date, S.expensetype, S.group_account_code, S.ic_code, S.je_category_code, S.je_source_code, S.lc_code, S.lc_end_bal_amt, S.period_id, S.proj_num, S.rmb_end_bal_amt, S.sub_account_code, S.tc_code, S.tc_end_bal_amt, S.terminal_flag
from (SELECT /*+ set(agg_redistribute_enhancement on) set (best_agg_plan 3) */ F.PERIOD_ID,
        F.BUSINESSCODE,
        F.BUSINESSDESC,
        F.COMPANY_CODE,
        F.IC_CODE,
        F.ACCOUNTCODE AS GROUP_ACCOUNT_CODE,
        NVL(RATE.TO_LC_CODE, F.LC_CODE) AS LC_CODE,
        CASE
          WHEN NVL(GRP.COA_REPORT_LOC_CODE, 'SNULL') = 'P'||'&'||'L item according to Z value' THEN
            CASE
          WHEN NVL(DEPT.DEPT_FIN_RPT_EXPS_TYPE_CODE, 'ZX') NOT IN('Z1', 'Z2', 'Z3', 'Z4', 'Z5', 'Z7') THEN
            'ZX'
          ELSE
            NVL(DEPT.DEPT_FIN_RPT_EXPS_TYPE_CODE, 'ZX')
        END ELSE '[None]' END AS EXPENSETYPE,
        SUM(F.LC_ORIG_END_MARKUP_AMT * NVL(RATE.EXCHANGE_RATE, 1)) AS LC_END_BAL_AMT,
        SUM(F.AMOUNT_RMB) AS RMB_END_BAL_AMT,
        SUM(F.AMOUNT_T) AS TC_END_BAL_AMT,
        'SNULL' AS BUY_FROM_IC,
        'SNULL' AS DIFF_ALLOC_TYPE,
        JS.JE_SOURCE_CODE AS JE_SOURCE_CODE,
        JC.JE_CATEGORY_CODE AS JE_CATEGORY_CODE,
        'SNULL' AS TERMINAL_FLAG,
        'SNULL' AS TC_CODE,
        'SNULL' AS BU_CODE,
        F.DATA_CATEGORY_CODE AS DATA_CATEGORY_CODE,
        F.COA_DEPT_CODE AS DEPT_CODE,
        'SNULL' AS SUB_ACCOUNT_CODE,
        'SNULL' AS PROJ_NUM,
        'C' AS DATA_PRODUCE_TYPE_FLAG,
        F.DATA_SOURCE_FLAG AS DATA_SOURCE_FLAG,
        'N' AS DEL_FLAG,
        SYSDATE DW_LAST_UPDATE_DATE
    FROM DWLFCA.DWL_FCA_U03_TMP F, DWRDIM.DWR_DIM_GRP_ACCT_CODE_D GRP, DWRDIM.DWR_DIM_DEPARTMENT_D DEPT, DWRDIM.DWR_DIM_JOURNAL_SOURCE_D JS, DWRDIM.DWR_DIM_JOURNAL_CATEGORY_D JC, DWLAR.DWR_AR_CUR_SWITCH_CONFIG_F RATE
    WHERE F.GROUP_ACCOUNT_CODE = GRP.GROUP_ACCOUNT_CODE
    AND F.JE_SOURCE_ID = JS.JE_SOURCE_ID(+)
    AND F.JE_CATEGORY_ID = JC.JE_CATEGORY_ID(+)
    AND F.COA_DEPT_CODE = DEPT.DEPT_CODE(+)
    AND DEPT.SCD_ACTIVE_BEGIN_DATE(+) <= LAST_DAY(TO_DATE(202208, 'YYYYMM'))
    AND DEPT.SCD_ACTIVE_END_DATE(+) > LAST_DAY(TO_DATE(202208, 'YYYYMM'))
    AND NOT REGEXP_LIKE(F.GROUP_ACCOUNT_CODE, '^(1340499|58007)')
    AND F.COMPANY_CODE = RATE.COMPANY_CODE(+)
    AND F.LC_CODE = RATE.FROM_LC_CODE(+)
    AND RATE.PERIOD_ID(+) <= 202208
    AND F.PERIOD_ID = 202208
    GROUP BY F.PERIOD_ID,
        F.BUSINESSCODE,
        F.BUSINESSDESC,
        F.COMPANY_CODE,
        F.IC_CODE,
        F.ACCOUNTCODE,
        NVL(RATE.TO_LC_CODE, F.LC_CODE),
        CASE
          WHEN NVL(GRP.COA_REPORT_LOC_CODE, 'SNULL') = 'P'||'&'||'L item according to Z value' THEN
            CASE
          WHEN NVL(DEPT.DEPT_FIN_RPT_EXPS_TYPE_CODE, 'ZX') NOT IN('Z1', 'Z2', 'Z3', 'Z4', 'Z5', 'Z7') THEN
            'ZX'
          ELSE
            NVL(DEPT.DEPT_FIN_RPT_EXPS_TYPE_CODE, 'ZX')
        END ELSE '[None]' END,
        JS.JE_SOURCE_CODE,
        JC.JE_CATEGORY_CODE,
        F.DATA_CATEGORY_CODE,
        F.COA_DEPT_CODE,/*F.SOURCE_FLAG,*/
        NVL(RATE.EXCHANGE_RATE, 1),
        F.DATA_SOURCE_FLAG
    HAVING ABS(SUM(F.LC_ORIG_END_MARKUP_AMT * NVL(RATE.EXCHANGE_RATE, 1))) > 0.0001 
        OR ABS(SUM(F.AMOUNT_RMB)) > 0.0001
        OR ABS(SUM(F.AMOUNT_T)) > 0.0001 
) S

新的explain performance信息(为避免对业务表dwlfca.dwl_fca_consol_f数据的影响,实际执行是没有带INSERT动作,只从SELECT部分开始执行)如下,详细信息参见附件:计算倾斜-agg计算倾斜-performance-2.txt

 id |                                      operation                                       |        A-time        |  A-rows  |  E-rows  | E-distinct |  Peak Memory   |    E-memory     |  A-width  | E-width |  E-costs   
----+--------------------------------------------------------------------------------------+----------------------+----------+----------+------------+----------------+-----------------+-----------+---------+------------
  1 | ->  Row Adapter                                                                      | 2393.622             |   132328 |   145582 |            | 236KB          |                 |           |     551 | 9509386.32 
  2 |    ->  Vector Streaming (type: GATHER)                                               | 2370.012             |   132328 |   145582 |            | 1033KB         |                 |           |     551 | 9509386.32 
  3 |       ->  Vector Subquery Scan on s                                                  | [2228.119, 2294.500] |   132328 |   145582 |            | [504KB, 504KB] | 1MB             |           |     551 | 9508024.29 
  4 |          ->  Vector Hash Aggregate                                                   | [2228.077, 2294.439] |   132328 |   145582 |            | [72MB, 72MB]   | 2052MB(39891MB) | [471,471] |     403 | 9508006.09 
  5 |             ->  Vector Streaming(type: REDISTRIBUTE)                                 | [2011.118, 2087.604] |  6345108 | 29116464 |            | [2MB, 2MB]     | 3MB             |           |     403 | 3448962.99 
  6 |                ->  Vector Hash Aggregate                                             | [1218.919, 1917.267] |  6345108 | 29116464 |            | [58MB, 58MB]   | 394MB(10235MB)  | [407,407] |     403 | 453793.49  
  7 |                   ->  Vector Hash Left Join (8, 17)                                  | [942.243, 1470.632]  | 26419471 | 29116464 |            | [1MB, 1MB]     | 16MB            |           |     211 | 418307.78  
  8 |                      ->  Vector Hash Left Join (9, 16)                               | [875.608, 1360.130]  | 26419471 | 29116464 | 182        | [1MB, 1MB]     | 16MB            |           |     121 | 411806.97  
  9 |                         ->  Vector Hash Left Join (10, 15)                           | [828.902, 1276.732]  | 26419471 | 29116464 | 14         | [2MB, 2MB]     | 16MB            |           |     116 | 406286.57  
 10 |                            ->  Vector Sonic Hash Join (11,14)                        | [806.310, 1232.927]  | 26419471 | 29116464 | 21         | [2MB, 2MB]     | 16MB            |           |     107 | 401357.61  
 11 |                               ->  Vector Hash Left Join (12, 13)                     | [767.377, 1175.995]  | 26419471 | 29116464 | 77         | [6MB, 6MB]     | 16MB            |           |     104 | 386724.73  
 12 |                                  ->  CStore Scan on dwlfca.dwl_fca_u03_tmp f(FACT)   | [274.769, 509.909]   | 26419471 | 26469513 | 5036       | [3MB, 3MB]     | 1MB             |           |     101 | 9709.06    
 13 |                                  ->  CStore Scan on dwrdim.dwr_dim_department_d dept | [403.182, 629.244]   |  5761760 |  7972480 | 9134       | [6MB, 6MB]     | 1MB             | [48,48]   |      10 | 321414.96  
 14 |                               ->  CStore Scan on dwrdim.dwr_dim_grp_acct_code_d grp  | [11.792, 18.551]     |  2516960 |  2517360 | 31467      | [3MB, 3MB]     | 1MB             |           |      18 | 9235.15    
 15 |                            ->  CStore Scan on dwrdim.dwr_dim_journal_category_d jc   | [0.195, 0.494]       |  1110000 |  1110000 | 13875      | [1MB, 1MB]     | 1MB             | [59,59]   |      21 | 3301.88    
 16 |                         ->  CStore Scan on dwrdim.dwr_dim_journal_source_d js        | [0.065, 0.129]       |    45040 |    45040 | 563        | [976KB, 976KB] | 1MB             | [55,55]   |      16 | 563.56     
 17 |                      ->  Vector Streaming(type: BROADCAST)                           | [0.141, 0.314]       |      560 |    21360 | 160        | [1MB, 1MB]     | 3MB             | [88,88]   |     266 | 203.93     
 18 |                         ->  CStore Scan on dwlar.dwr_ar_cur_switch_config_f rate     | [0.012, 0.301]       |        7 |      267 |            | [1MB, 1MB]     | 1MB             |           |     266 | 10.04      

                                                                               Predicate Information (identified by plan id)                                                                               
-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
  4 --Vector Hash Aggregate
        Filter: ((abs(sum((sum((f.lc_orig_end_markup_amt * (COALESCE(rate.exchange_rate, 1::numeric))))))) > 0.0001) OR (abs(sum((sum(f.amount_rmb)))) > 0.0001) OR (abs(sum((sum(f.amount_t)))) > 0.0001))
  7 --Vector Hash Left Join (8, 17)


更多精彩内容请看《GaussDB(DWS)性能调优:基本性能问题识别与优化导图

【版权声明】本文为华为云社区用户原创内容,转载时必须标注文章的来源(华为云社区)、文章链接、文章作者等基本信息, 否则作者和本社区有权追究责任。如果您发现本社区中有涉嫌抄袭的内容,欢迎发送邮件进行举报,并提供相关证据,一经查实,本社区将立刻删除涉嫌侵权内容,举报邮箱: cloudbbs@huaweicloud.com
  • 点赞
  • 收藏
  • 关注作者

评论(0

0/1000
抱歉,系统识别当前为高风险访问,暂不支持该操作

全部回复

上滑加载中

设置昵称

在此一键设置昵称,即可参与社区互动!

*长度不超过10个汉字或20个英文字符,设置后3个月内不可修改。

*长度不超过10个汉字或20个英文字符,设置后3个月内不可修改。