GAUSSDB-SQL优化
【摘要】 常见优化定式熟悉常见的优化定式:建索引、plan hint、not in转not exists、union转union all、隐式转换等等 注意事项通常的SQL优化会通过参数调优的方式进行调整,例如如下参数(1)、最顶层算子为Data Node Scan时,需要设置enable_fast_query_shipping为off才能看到具体的执行计划set enable_fast_query...
常见优化定式
熟悉常见的优化定式:建索引、plan hint、not in转not exists、union转union all、隐式转换等等
注意事项
通常的SQL优化会通过参数调优的方式进行调整,例如如下参数
(1)、最顶层算子为Data Node Scan时,需要设置enable_fast_query_shipping为off才能看到具体的执行计划
set enable_fast_query_shipping = off;
(2)、 enable_stream_operator设置为off时无法生成包含stream算子的计划
set enable_stream_operator = on;
示例
--删除表
DROP TABLE IF EXISTS gaussdb.tb_user;
--创建表
create table tb_user(stu_no int,stu_name varchar(32),age int,hobby_type int) distribute by hash(age);
--插入数据
insert into tb_user select id,'xiaoming'||(random()*60+10)::int,
(random()*60+10)::int,
(random()*5+1)::int
from (select generate_series(1,100000)id);
1、收集tb_user的统计信息
analyze tb_user;
2、为下面两个查询语句创建索引,让执行计划和索引最合理
SQL1:explain analyze select * from tb_user where age=29 and stu_name='xiaoming';
SQL2:explain analyze select * from tb_user where stu_no=100 and age=29;
3、在上题操作的基础上,用4种不同方式使如下SQL不走索引
explain analyze select * from tb_user where stu_no=100 and age=29;
1)--添加hint,使优化器不走索引
explain analyze select /*+ tablescan(tb_user) */ * from tb_user where stu_no=100 and age=29;
2)--调大使用indexscan的开销
Show cpu_index_tuple_cost;
Set cpu_index_tuple_cost=100000;
Show cpu_index_tuple_cost;
explain analyze select * from tb_user where stu_no=100 and age=29;
3)--禁用tb_user表上的索引
alter index age_name unusable;
alter index age_no unusable;
explain analyze select * from tb_user where stu_no=100 and age=29;
重新启索引
alter index age_name rebuild;
alter index age_no rebuild;
4)--禁止优化器使用indexscan
Show enable_indexscan;
Show enable_bitmapscan;
set enable_indexscan=off;
explain analyze select * from tb_user where stu_no=100 and age=29;
发现它走了bitmap index扫描,再禁止bitmapscan
set enable_bitmapscan=off;
explain analyze select * from tb_user where stu_no=100 and age=29;
2、练习2
当前有一张test(id、kemu、classID、grade),里面有8万条数据
表定义:
create table test(id int, kemu text, grade int,class int);
初始化数据:
insert into test values(generate_series(1,10000),'语文',round(random()*99),202201);
insert into test values(generate_series(1,10000),'数学',round(random()*100),202201);
insert into test values(generate_series(1,10000),'英语',round(random()*100),202201);
insert into test values(generate_series(1,10000),'物理',round(random()*100),202201);
insert into test values(generate_series(1,10000),'语文',round(random()*100),202202);
insert into test values(generate_series(1,10000),'数学',round(random()*100),202202);
insert into test values(generate_series(1,10000),'英语',round(random()*100),202202);
insert into test values(generate_series(1,10000),'物理',round(random()*50),202202);
查询202202班级同一科目成绩比202201班级最高分高的同学,根据以下SQL优化重写
原始的SQL:
select * from test t1 where t1.class=202202 and grade >
(
select max(grade) from test t2
where t2.class=202201 and t2.kemu=t1.kemu
);
查看执行计划
explain analyze select * from test t1 where t1.class=202202 and grade >
(
select max(grade) from test t2
where t2.class=202201 and t2.kemu=t1.kemu
);
--原SQL中存在子查询,每扫描一次t1表,会遍历子查询结果,性能较差,改成join方式,消除子查询,性能有xxxms提升至xxms。
优化的SQL:
select * from test t1 join
(
select kemu,max(grade) as t2_max_grade
from test
where class = 202201
group by kemu
) t2
on t1.kemu = t2.kemu
and t1.grade > t2.t2_max_grade
and t1. class = 202202;
查看执行计划
explain analyze select * from test t1 join
(
select kemu,max(grade) as t2_max_grade
from test
where class = 202201
group by kemu
) t2
on t1.kemu = t2.kemu
and t1.grade > t2.t2_max_grade
and t1. class = 202202;
3、练习3
当前有三个表,分别是学生信息表student和202201班级成绩表score1,202202班级成绩表score2
--删除表
DROP TABLE IF EXISTS gaussdb.student;
DROP TABLE IF EXISTS gaussdb.score1;
DROP TABLE IF EXISTS gaussdb.score2;
--创建表
create table Student(sno int,sname varchar(50));
create table score1(sno int,kemu varchar(50),Grade int,classid varchar(20));
create table score2(sno int,kemu varchar(50),Grade int,classid varchar(20));
--灌入数据
insert into student values(1,'jack'),(2,'Rose'),(3,'Tom'),(4,'Bob'),(5,'Lucy'),(6,'Lily');
insert into score1 values(1,'chinese',86,'202201'),(2,'chinese',89,'202201'),(3,'chinese',60,'202201'),(4,'chinese',70,'202201'),(5,'chinese',80,'202201'),(6,'chinese',99,'202201');
insert into score1 values(1,'math',86,'202201'),(2,'math',89,'202201'),(3,'math',60,'202201'),(4,'math',70,'202201'),(5,'math',80,'202201'),(6,'math',99,'202201');
insert into score2 values(1,'chinese',86,'202202'),(2,'chinese',89,'202202'),(3,'chinese',61,'202202'),(4,'chinese',71,'202202'),(5,'chinese',81,'202202'),(6,'chinese',91,'202202');
insert into score2 values(1,'math',86,'202202'),(2,'math',89,'202202'),(3,'math',61,'202202'),(4,'math',71,'202202'),(5,'math',81,'202202'),(6,'math',91,'202202');
1、查202201班级和202202班级所有人语文成绩前10的记录,第一个查询要使用union
方法1、
select * from
(
select * from score1 where kemu='chinese'
union
select * from score2 where kemu='chinese'
) t order by Grade desc limit 10;
方法2、
select * from
(
select *,row_number() over(order by Grade desc) rn from
(
select * from score1 where kemu='chinese'
union
select * from score2 where kemu='chinese'
)
) t where rn<=10;
方法3、
select * from
(
select *,rank() over(order by Grade desc) rn from
(
select * from score1 where kemu='chinese'
union
select * from score2 where kemu='chinese'
)
) t where rn<=10;
方法4、
select * from
(
select *,dense_rank() over(order by Grade desc) rn from
(
select * from score1 where kemu='chinese'
union
select * from score2 where kemu='chinese'
)
) t where rn<=10;
2、对以上SQL语句进行优化 --将union修改为union all
方法1、
select * from
(
select * from score1 where kemu='chinese'
union all
select * from score2 where kemu='chinese'
) t order by Grade desc limit 10;
方法2、
select * from
(
select *,row_number() over(order by Grade desc) rn from
(
select * from score1 where kemu='chinese'
union all
select * from score2 where kemu='chinese'
)
) t where rn<=10;
方法3、
select * from
(
select *,rank() over(order by Grade desc) rn from
(
select * from score1 where kemu='chinese'
union all
select * from score2 where kemu='chinese'
)
) t where rn<=10;
方法4、
select * from
(
select *,dense_rank() over(order by Grade desc) rn from
(
select * from score1 where kemu='chinese'
union all
select * from score2 where kemu='chinese'
)
) t where rn<=10;
3、查看两个班级相同的科目,202201班在score2中不存在的成绩,要求使用not in(需要确定score1、score2表具体字段有哪些科目,以及题目所谓相同科目是一个具体科目还是所有科目都要做判断)
判断某一科目
select * from score1 where kemu='chinese' and grade not in (select grade from score2 where kemu='chinese');
判断所有科目
select * from score1 s1 where grade not in (select grade from score2 s2 where s2.kemu=s1.kemu);
4、对以上SQL语句进行优化
--not in修改为not exits not exists后面的的子查询包含前后表关联条件
判断某一科目
select * from score1 t1 where t1.kemu='chinese' and not exists (select grade from score2 t2 where t2.kemu='chinese' and t1.grade=t2.grade) ;
判断所有科目
select * from score1 t1 where not exists (select grade from score2 t2 where t2.kemu=t1.kemu and t1.grade=t2.grade);
5、查询班级202201语文成绩最高的学生,要求先创建索引,并且能保证一定会使用索引
select sno,Grade from score1 where kemu='chinese' order by Grade desc limit 1
explain analyze select sno,Grade from score1 where kemu='chinese' order by Grade desc limit 1
select * from gs_index_advise('select sno,Grade from score1 where kemu=''chinese'' order by Grade desc limit 1');
create index idx_score1_grade on score1(grade);
explain analyze select sno,Grade from score1 where kemu='chinese' order by Grade desc limit 1
6、查询202201班级的学生的成绩比202202班级的学生最高成绩还要大的学生信息,对以下给出的SQL进行改写
改写前的SQL:
select sno ,totalscore1
from
(select sno ,Grade as totalscore1 from score1) s1,
(select max(Grade) as totalscore2 from score2) s2
where totalscore1>totalscore2;
改写后的SQL:
select * from score1 where Grade>(
select max(Grade) as totalscore2 from score2
)
4、练习4
--删除表
DROP TABLE IF EXISTS gaussdb.teacher;
DROP TABLE IF EXISTS gaussdb.course;
DROP TABLE IF EXISTS gaussdb.class;
DROP TABLE IF EXISTS gaussdb.scclassore;
--创建表
--教师表(教师名、教师编号)
create table teacher(t_name varchar,t_no int);
--课程表(课程名、任课老师编号、课程编号)
create table course(course_name varchar,t_no int,course_no int);
--班级表(班级名称、班级编号、学年)
create table class(c_name varchar,cno int,xuenian varchar);
--分数表(课程编号、分数、学生学号、班级编号)
create table scclassore(course_no int,score int,stu_no int,cno int);
--插入数据
insert into teacher values('语文王老师',111);
insert into teacher values('数学李老师',222);
insert into course values('语文',111,111);
insert into course values('数学',222,222);
insert into class values('1班',111,'2020'),('2班',222,'2020');
insert into scclassore values(111,80,1,111),(111,90,2,111),(111,60,1,222),(111,90,2,222);
1、查询2020学年,语文的平均成绩大于80的班级,打印班级名称及平均成绩,要求where条件里有两个非相关子查询
方法1、
select c_name,
avg(score) as avgscore
from scclassore s,
class c
where s.cno = c.cno
and course_no in (select course_no from course where course_name = '语文')
and s.cno in (select cno from class where xuenian = '2020')
group by c_name
having avgscore >80;
方法2、
select c.c_name,
avg(s.score) as avg_score
from scclassore s
inner join class c
on c.cno = s.cno
where s.cno in (select cno from class where xuenian = '2020')
and s.course_no in (select course_no from course where course_name = '语文')
group by c.c_name
having avg_score>80;
2、优化上一步的语句,将where条件中的非相关子查询 改为from后边的范围表
方法1、
select cl.c_name,avg(score) from scclassore sc,
(select course_no from course where course_name='语文') co,
(select cno,c_name from class where xuenian='2020') cl
where sc.cno = cl.cno and sc.course_no =co.course_no
group by cl.c_name
having avg(score)>80
方法2、
select c_name,avg_score
from class t3 ,
(
select t1.cno,avg(t1.score) as avg_score
from scclassore t1
where
t1.course_no in (select course_no from course t2 where t2.course_name = '语文')
and t1.cno in (select cno from class where xuenian = '2020')
group by t1.cno
having avg_score>80
) t4
where t3.cno = t4.cno;
方法3、
select e.c_name,d.avg
from class e,
(
select t1.cno,avg(t1.score) avg
from scclassore t1,course t2,
(select cno from class where xuenian = '2020') t3,
(select course_no from course where course_name = '语文') t4
where t1.course_no = t2.course_no
and t1.cno = t3.cno
and t2.course_no = t4.course_no
group by t1.cno
having avg(t1.score)>80
) d
where e.cno = d.cno;
3、优化上一步的语句,将from后边的子查询优化为父表的关联查询
方法1、
select
c_name,
avg(score) as avgscore
from
scclassore s,
class c,
course cs
where
s.cno = c.cno
and s.course_no = cs.course_no
and course_name = '语文'
and xuenian = '2020'
group by c_name
having avgscore >80;
方法2、
select
c_name,
avg(score) as avgscore
from scclassore s
inner join class c on s.cno = c.cno
inner join course cs on s.course_no = cs.course_no
where course_name = '语文' and xuenian = '2020'
group by c_name
having avgscore >80;
【声明】本内容来自华为云开发者社区博主,不代表华为云及华为云开发者社区的观点和立场。转载时必须标注文章的来源(华为云社区)、文章链接、文章作者等基本信息,否则作者和本社区有权追究责任。如果您发现本社区中有涉嫌抄袭的内容,欢迎发送邮件进行举报,并提供相关证据,一经查实,本社区将立刻删除涉嫌侵权内容,举报邮箱:
cloudbbs@huaweicloud.com
- 点赞
- 收藏
- 关注作者
评论(0)