华为云RDS PG postgres_fdw sharding 分库分表的实践
1 测试环境及步骤介绍
1.1 环境信息
在现网购买三台RDS for Postgresql 14版本的实例进行测试,信息如下:
master:192.168.1.212
shard0:shard0, shard1 192.168.1.196
shard2:shard2, shard3 192.168.1.84
已购买的实例位于同一VPC下,且使用同一安全组,配置该安全组互通
1.2 测试步骤简述
1、在master上创建postgres_fdw以及主表(主分区)。应用访问master
2、在shard上创建分区表
3、在master上创建这些分区表的外部表
4、将外部表作为分区attach到主表
5、应用访问master的主表即可。数据自动路由到shard。
支持select, insert, update, delete, join, group, sort等操作,所有操作不受影响,只是某些查询可以push down,某些可能不行。
2 详细测试步骤
2.1 master,创建master库。
create user dtstest with password 'xxxxxx';
grant root to dtstest ;
create database master with owner dtstest;
2.2 远程,创建shard库。
远程实例1
create user dtstest with password 'xxxxxx';
create database shard0 with owner dtstest;
create database shard1 with owner dtstest;
远程实例2
create user dtstest with password 'xxxxxx';
create database shard2 with owner dtstest;
create database shard3 with owner dtstest;
2.3 master,创建fdw插件
使用dtstest用户登录master数据库
select control_extension ('create','postgres_fdw');
2.4 master,创建远程实例server
CREATE SERVER s0 FOREIGN DATA WRAPPER postgres_fdw OPTIONS (host '192.168.1.196', port '5432', dbname 'shard0');
CREATE SERVER s1 FOREIGN DATA WRAPPER postgres_fdw OPTIONS (host '192.168.1.196', port '5432', dbname 'shard1');
CREATE SERVER s2 FOREIGN DATA WRAPPER postgres_fdw OPTIONS (host '192.168.1.84', port '5432', dbname 'shard2');
CREATE SERVER s3 FOREIGN DATA WRAPPER postgres_fdw OPTIONS (host '192.168.1.84', port '5432', dbname 'shard3');
2.5 master,给本地账号(应用程序连接master的数据库账号)配置如何连接远程实例server
CREATE USER MAPPING FOR dtstest SERVER s0 OPTIONS (user ' dtstest', password 'xxx');
CREATE USER MAPPING FOR dtstest SERVER s1 OPTIONS (user 'dtstest', password 'xxx');
CREATE USER MAPPING FOR dtstest SERVER s2 OPTIONS (user 'dtstest', password 'xxx');
CREATE USER MAPPING FOR dtstest SERVER s3 OPTIONS (user 'dtstest', password 'xxx');
2.6 master,创建主表(主分区),如果是多级分区表,则创建好一级分区。
create table tbl_user (id int, info text, crt_time timestamp) PARTITION BY hash (id);
create table tbl_log (id int, orderid int8, info text, crt_time timestamp) PARTITION BY hash (id);
tbl_log为两级分区表,第一级是id hash分区,第二级是orderid分区
create table tbl_log0 PARTITION OF tbl_log FOR VALUES WITH ( MODULUS 4, REMAINDER 0) PARTITION BY hash (orderid);
create table tbl_log1 PARTITION OF tbl_log FOR VALUES WITH ( MODULUS 4, REMAINDER 1) PARTITION BY hash (orderid);
create table tbl_log2 PARTITION OF tbl_log FOR VALUES WITH ( MODULUS 4, REMAINDER 2) PARTITION BY hash (orderid);
create table tbl_log3 PARTITION OF tbl_log FOR VALUES WITH ( MODULUS 4, REMAINDER 3) PARTITION BY hash (orderid);
接下来这些表的下一级分区将使用fdw外部表attach上来。
多级分区可以定义在master,也可以下放到远程库进行定义。本例使用的是master定义二级分区。
2.7 远程库,创建数据表
s0,s1,s2,s3
在'shard0,'shard1,'shard2,'shard3
create table tbl_user (id int primary key, info text, crt_time timestamp);
create table tbl_log_0 (id int not null, orderid int8 not null, info text, crt_time timestamp);
create table tbl_log_1 (id int not null, orderid int8 not null, info text, crt_time timestamp);
create table tbl_log_2 (id int not null, orderid int8 not null, info text, crt_time timestamp);
create table tbl_log_3 (id int not null, orderid int8 not null, info text, crt_time timestamp);
2.8 master,创建远程实例的外部表。
CREATE FOREIGN TABLE tbl_user_0 (id int not null, info text, crt_time timestamp) SERVER s0 OPTIONS (schema_name 'public', table_name 'tbl_user');
CREATE FOREIGN TABLE tbl_user_1 (id int not null, info text, crt_time timestamp) SERVER s1 OPTIONS (schema_name 'public', table_name 'tbl_user');
CREATE FOREIGN TABLE tbl_user_2 (id int not null, info text, crt_time timestamp) SERVER s2 OPTIONS (schema_name 'public', table_name 'tbl_user');
CREATE FOREIGN TABLE tbl_user_3 (id int not null, info text, crt_time timestamp) SERVER s3 OPTIONS (schema_name 'public', table_name 'tbl_user');
CREATE FOREIGN TABLE tbl_log0_0 (id int not null, orderid int8 not null, info text, crt_time timestamp) SERVER s0 OPTIONS (schema_name 'public', table_name 'tbl_log_0');
CREATE FOREIGN TABLE tbl_log0_1 (id int not null, orderid int8 not null, info text, crt_time timestamp) SERVER s0 OPTIONS (schema_name 'public', table_name 'tbl_log_1');
CREATE FOREIGN TABLE tbl_log0_2 (id int not null, orderid int8 not null, info text, crt_time timestamp) SERVER s0 OPTIONS (schema_name 'public', table_name 'tbl_log_2');
CREATE FOREIGN TABLE tbl_log0_3 (id int not null, orderid int8 not null, info text, crt_time timestamp) SERVER s0 OPTIONS (schema_name 'public', table_name 'tbl_log_3');
CREATE FOREIGN TABLE tbl_log1_0 (id int not null, orderid int8 not null, info text, crt_time timestamp) SERVER s1 OPTIONS (schema_name 'public', table_name 'tbl_log_0');
CREATE FOREIGN TABLE tbl_log1_1 (id int not null, orderid int8 not null, info text, crt_time timestamp) SERVER s1 OPTIONS (schema_name 'public', table_name 'tbl_log_1');
CREATE FOREIGN TABLE tbl_log1_2 (id int not null, orderid int8 not null, info text, crt_time timestamp) SERVER s1 OPTIONS (schema_name 'public', table_name 'tbl_log_2');
CREATE FOREIGN TABLE tbl_log1_3 (id int not null, orderid int8 not null, info text, crt_time timestamp) SERVER s1 OPTIONS (schema_name 'public', table_name 'tbl_log_3');
CREATE FOREIGN TABLE tbl_log2_0 (id int not null, orderid int8 not null, info text, crt_time timestamp) SERVER s2 OPTIONS (schema_name 'public', table_name 'tbl_log_0');
CREATE FOREIGN TABLE tbl_log2_1 (id int not null, orderid int8 not null, info text, crt_time timestamp) SERVER s2 OPTIONS (schema_name 'public', table_name 'tbl_log_1');
CREATE FOREIGN TABLE tbl_log2_2 (id int not null, orderid int8 not null, info text, crt_time timestamp) SERVER s2 OPTIONS (schema_name 'public', table_name 'tbl_log_2');
CREATE FOREIGN TABLE tbl_log2_3 (id int not null, orderid int8 not null, info text, crt_time timestamp) SERVER s2 OPTIONS (schema_name 'public', table_name 'tbl_log_3');
CREATE FOREIGN TABLE tbl_log3_0 (id int not null, orderid int8 not null, info text, crt_time timestamp) SERVER s3 OPTIONS (schema_name 'public', table_name 'tbl_log_0');
CREATE FOREIGN TABLE tbl_log3_1 (id int not null, orderid int8 not null, info text, crt_time timestamp) SERVER s3 OPTIONS (schema_name 'public', table_name 'tbl_log_1');
CREATE FOREIGN TABLE tbl_log3_2 (id int not null, orderid int8 not null, info text, crt_time timestamp) SERVER s3 OPTIONS (schema_name 'public', table_name 'tbl_log_2');
CREATE FOREIGN TABLE tbl_log3_3 (id int not null, orderid int8 not null, info text, crt_time timestamp) SERVER s3 OPTIONS (schema_name 'public', table_name 'tbl_log_3');
attach分区:
alter table tbl_user attach partition tbl_user_0 FOR VALUES WITH ( MODULUS 4, REMAINDER 0);
alter table tbl_user attach partition tbl_user_1 FOR VALUES WITH ( MODULUS 4, REMAINDER 1);
alter table tbl_user attach partition tbl_user_2 FOR VALUES WITH ( MODULUS 4, REMAINDER 2);
alter table tbl_user attach partition tbl_user_3 FOR VALUES WITH ( MODULUS 4, REMAINDER 3);
alter table tbl_log0 attach partition tbl_log0_0 FOR VALUES WITH ( MODULUS 4, REMAINDER 0);
alter table tbl_log0 attach partition tbl_log0_1 FOR VALUES WITH ( MODULUS 4, REMAINDER 1);
alter table tbl_log0 attach partition tbl_log0_2 FOR VALUES WITH ( MODULUS 4, REMAINDER 2);
alter table tbl_log0 attach partition tbl_log0_3 FOR VALUES WITH ( MODULUS 4, REMAINDER 3);
alter table tbl_log1 attach partition tbl_log1_0 FOR VALUES WITH ( MODULUS 4, REMAINDER 0);
alter table tbl_log1 attach partition tbl_log1_1 FOR VALUES WITH ( MODULUS 4, REMAINDER 1);
alter table tbl_log1 attach partition tbl_log1_2 FOR VALUES WITH ( MODULUS 4, REMAINDER 2);
alter table tbl_log1 attach partition tbl_log1_3 FOR VALUES WITH ( MODULUS 4, REMAINDER 3);
alter table tbl_log2 attach partition tbl_log2_0 FOR VALUES WITH ( MODULUS 4, REMAINDER 0);
alter table tbl_log2 attach partition tbl_log2_1 FOR VALUES WITH ( MODULUS 4, REMAINDER 1);
alter table tbl_log2 attach partition tbl_log2_2 FOR VALUES WITH ( MODULUS 4, REMAINDER 2);
alter table tbl_log2 attach partition tbl_log2_3 FOR VALUES WITH ( MODULUS 4, REMAINDER 3);
alter table tbl_log3 attach partition tbl_log3_0 FOR VALUES WITH ( MODULUS 4, REMAINDER 0);
alter table tbl_log3 attach partition tbl_log3_1 FOR VALUES WITH ( MODULUS 4, REMAINDER 1);
alter table tbl_log3 attach partition tbl_log3_2 FOR VALUES WITH ( MODULUS 4, REMAINDER 2);
alter table tbl_log3 attach partition tbl_log3_3 FOR VALUES WITH ( MODULUS 4, REMAINDER 3);
以上,部署了两个表的sharding,其中一个为一级分区,另一个是二级分区。
3 使用验证(读写 更新 JOIN 排序 聚合) 在maser上执行
3.1 sql1
insert into tbl_user select generate_series(1,10000), md5(random()::text), clock_timestamp();
select * from tbl_user where id=1;
explain verbose select * from tbl_user where id=1;
在本地实例master库向分区表tbl_user插入1万条数据,数据自动路由到远程实例1,2中的shared0, shared1, shared2, shared3库中的tbl_user表:
在远程实例1中shared0库查询:
在远程实例1中shared1库查询:
在远程实例2中shared2库查询:
在远程实例2中shared3库查询
3.2 sql2
insert into tbl_log select random()*10000, generate_series(1,100000), md5(random()::text), clock_timestamp();
select * from tbl_log where orderid =1;
explain verbose select * from tbl_log where orderid =1;
explain verbose select * from tbl_log where id=1;
explain verbose select * from tbl_log join tbl_user using (id) where tbl_log.orderid=1;
explain verbose select * from tbl_log join tbl_user using (id) ;
explain verbose select * from tbl_log order by id limit 1;
explain verbose select * from tbl_log join tbl_user using (id) where id=1;
在本地实例master库向二级分区表tbl_log插入1万条数据,数据自动路由到远程实例1,2中的shared0, shared1, shared2, shared3库中tbl_log_0,tbl_log_1,tbl_log_2,tbl_log_3表:
在远程实例1中shared0库查询:
在远程实例1中shared1库查询:
在远程实例2中shared2库查询:
在远程实例2中shared3库查询:
4 小结
在同一个vpc内的所有rds pg实例,可以互联互通。甚至和mysql实例也能互联互通。
利用PostgreSQL的postgres_fdw, partition table的接口,可以实现数据库的分库分表。但是目前还不是特别的完善,未来PG社区可能会在这个形态下发力(bruce 多次公开分享了社区使用postgres_fdw做sharding的想法),在性能、功能方面实现更完美的操作。
- 点赞
- 收藏
- 关注作者
评论(0)