跨字符集迁移数据库

举报
DRS技术快客 发表于 2021/05/31 20:44:53 2021/05/31
【摘要】 普通迁移1、创建一个gbk字符集的表create table t_gbk (val varchar(32)) default character set gbk2、插入一条数据mysql> insert into t_gbk values('华为云');Query OK, 1 row affected (0.02 sec)mysql> select * from t_gbk;+------...

普通迁移

1、创建一个gbk字符集的表

create table t_gbk (
val varchar(32)
) default character set gbk

2、插入一条数据

mysql> insert into t_gbk values('华为云');
Query OK, 1 row affected (0.02 sec)

mysql> select * from t_gbk;
+--------+
| val    |
+--------+
| 华为云 |
+--------+

3、创建一个utf8字符集的表

create table t_utf8 (
val varchar(32)
) default character set utf8

4、我们用java来迁移这张表

String srcSql = "SELECT * from t_gbk";
String dstSql = "INSERT INTO t_utf8 values (?)";
try (Connection srcConn = getConn(srcUrl, srcUser, srcPwd)) {
    try (Statement srcStat = srcConn.createStatement()) {
        try (ResultSet rs = srcStat.executeQuery(srcSql)) {
            try (Connection dstConn = getConn(dstUrl, dstUser, dstPwd)) {
                while (rs.next()) {
                    try (PreparedStatement dstPs = dstConn.prepareStatement(dstSql)){
                        dstPs.setString(1, rs.getString(1));
                        dstPs.executeUpdate();
                    }
                }
            }
        }
    }
}

5、我们来查询

mysql> select * from t_utf8;
+--------+
| val    |
+--------+
| 华为云 |
+--------+

可见数据完整迁移过去了

大字段迁移

有时候当客户使用的是longtext字段的时候,因为longtext的长度可以达到几百M,这个时候
1、创建一张表

create table t_gbk_long (
val longtext
) default character set gbk;

2、用jdbc插入一个大字段(运行这段代码需要开启6G内存)

String initSql = "INSERT INTO t_gbk_long values (?)";
StringBuilder sb = new StringBuilder();
for (int i=0; i<(1<<24); i++) {
    sb.append("华为云");
}
try (Connection initConn = getConn(srcUrl, srcUser, srcPwd)) {
    try (PreparedStatement initPs = initConn.prepareStatement(initSql)){
        initPs.setString(1, sb.toString());
        initPs.executeUpdate();
    }
}

通过SQL再插入一条更大的

mysql> insert into t_gbk_long select concat(val,val,val,val,val) from t_gbk_long;
Query OK, 1 row affected (14.44 sec)
Records: 1  Duplicates: 0  Warnings: 0

mysql> select length(val) from t_gbk_long;
+-------------+
| length(val) |
+-------------+
|   100663296 |
|   503316480 |
+-------------+

3、目标端创建一个表

create table t_gbk_long_1 (
val longtext
) default character set gbk;

4、jdbc迁移String

String srcSql = "SELECT * from t_gbk_long";
String dstSql = "INSERT INTO t_gbk_long_2 values (?)";
try (Connection srcConn = getConn(srcUrl, srcUser, srcPwd)) {
    try (Statement srcStat = srcConn.createStatement()) {
        try (ResultSet rs = srcStat.executeQuery(srcSql)) {
            try (Connection dstConn = getConn(dstUrl, dstUser, dstPwd)) {
                while (rs.next()) {
                    try (PreparedStatement dstPs = dstConn.prepareStatement(dstSql)){
                        dstPs.setString(1, rs.getString(1));
                        dstPs.executeUpdate();
                    }
                }
            }
        }
    }
}

3G内存,结果OOM了

Exception in thread "main" java.lang.OutOfMemoryError: Java heap space
	at java.lang.StringCoding$StringDecoder.decode(StringCoding.java:149)
	at java.lang.StringCoding.decode(StringCoding.java:193)
	at java.lang.String.<init>(String.java:426)
	at com.mysql.cj.util.StringUtils.toString(StringUtils.java:1695)
	at com.mysql.cj.result.StringValueFactory.createFromBytes(StringValueFactory.java:129)
	at com.mysql.cj.result.StringValueFactory.createFromBytes(StringValueFactory.java:48)
	at com.mysql.cj.protocol.a.MysqlTextValueDecoder.decodeByteArray(MysqlTextValueDecoder.java:134)
	at com.mysql.cj.protocol.result.AbstractResultsetRow.decodeAndCreateReturnValue(AbstractResultsetRow.java:133)
	at com.mysql.cj.protocol.result.AbstractResultsetRow.getValueFromBytes(AbstractResultsetRow.java:241)
	at com.mysql.cj.protocol.a.result.TextBufferRow.getValue(TextBufferRow.java:132)
	at com.mysql.cj.jdbc.result.ResultSetImpl.getString(ResultSetImpl.java:839)

需要把内存调到4G才行
如果我们用byte进行读写,则可以2.5G内存就可以了
5、jdbc迁移bytes

String srcSql = "SELECT * from t_gbk_long";
String dstSql = "INSERT INTO t_gbk_long_2 values (?)";
try (Connection srcConn = getConn(srcUrl, srcUser, srcPwd)) {
    try (Statement srcStat = srcConn.createStatement()) {
        try (ResultSet rs = srcStat.executeQuery(srcSql)) {
            try (Connection dstConn = getConn(dstUrl, dstUser, dstPwd)) {
                while (rs.next()) {
                    try (PreparedStatement dstPs = dstConn.prepareStatement(dstSql)){
                        dstPs.setBinaryStream(1, new ByteArrayInputStream(rs.getBytes(1)));
                        dstPs.executeUpdate();
                    }
                }
            }
        }
    }
}

这里面t_gbk_long_2与t_gbk_long表结构一致,字符集也一致,此时相同字符集的bytes一致,因此不会有问题,那么如果字符集不一致会是什么样的呢
6、建表

create table t_utf8_long (
val longtext
) default character set utf8;

7、jdbc迁移bytes

String srcSql = "SELECT * from t_gbk_long";
String dstSql = "INSERT INTO t_utf8_long values (?)";
try (Connection srcConn = getConn(srcUrl, srcUser, srcPwd)) {
    try (Statement srcStat = srcConn.createStatement()) {
        try (ResultSet rs = srcStat.executeQuery(srcSql)) {
            try (Connection dstConn = getConn(dstUrl, dstUser, dstPwd)) {
                while (rs.next()) {
                    try (PreparedStatement dstPs = dstConn.prepareStatement(dstSql)){
                        dstPs.setBinaryStream(1, new ByteArrayInputStream(rs.getBytes(1)));
                        dstPs.executeUpdate();
                    }
                }
            }
        }
    }
}

结果:

Exception in thread "main" java.sql.SQLException: Incorrect string value: '\xBB\xAA\xCE\xAA\xD4\xC6...' for column 'val' at row 1
	at com.mysql.cj.jdbc.exceptions.SQLError.createSQLException(SQLError.java:129)
	at com.mysql.cj.jdbc.exceptions.SQLError.createSQLException(SQLError.java:97)
	at com.mysql.cj.jdbc.exceptions.SQLExceptionsMapping.translateException(SQLExceptionsMapping.java:122)
	at com.mysql.cj.jdbc.ClientPreparedStatement.executeInternal(ClientPreparedStatement.java:953)
	at com.mysql.cj.jdbc.ClientPreparedStatement.executeUpdateInternal(ClientPreparedStatement.java:1092)
	at com.mysql.cj.jdbc.ClientPreparedStatement.executeUpdateInternal(ClientPreparedStatement.java:1040)
	at com.mysql.cj.jdbc.ClientPreparedStatement.executeLargeUpdate(ClientPreparedStatement.java:1347)

可见不同的字符集的数据不能用byte做迁移,我们发现mysql有一个convert函数,它可以对不同字符集进行转换
8、利用convert进行处理

String srcSql = "SELECT * from t_gbk_long";
String dstSql = "INSERT INTO t_utf8_long values (convert(? using gbk))";

try (Connection srcConn = getConn(srcUrl, srcUser, srcPwd)) {
    try (Statement srcStat = srcConn.createStatement()) {
        try (ResultSet rs = srcStat.executeQuery(srcSql)) {
            try (Connection dstConn = getConn(dstUrl, dstUser, dstPwd)) {
                while (rs.next()) {
                    try (PreparedStatement dstPs = dstConn.prepareStatement(dstSql)){
                        dstPs.setBinaryStream(1, new ByteArrayInputStream(rs.getBytes(1)));
                        dstPs.executeUpdate();
                    }
                }
            }
        }
    }
}

看有一些文章写到,读取大字段用clob或者stream来读取,我们可以试一下
9、使用CLOB

String srcSql = "SELECT * from t_gbk_long";
String dstSql = "INSERT INTO t_utf8_long values (?)";
try (Connection srcConn = getConn(srcUrl, srcUser, srcPwd)) {
    try (Statement srcStat = srcConn.createStatement()) {
        try (ResultSet rs = srcStat.executeQuery(srcSql)) {
            try (Connection dstConn = getConn(dstUrl, dstUser, dstPwd)) {
                while (rs.next()) {
                    Clob clob = rs.getClob(1);
                    try (BufferedWriter writer = new BufferedWriter(new FileWriter("/tmp/clob.txt"))) {
                        try (BufferedReader reader = new BufferedReader(clob.getCharacterStream())) {
                            CharBuffer buf = CharBuffer.wrap(new char[8 * 1024]);
                            while ((reader.read(buf) > 0)) {
                                writer.write(buf.array(), 0, buf.limit());
                                buf.flip();
                            }
                        }
                    }
                    try (PreparedStatement dstPs = dstConn.prepareStatement(dstSql)) {
                        dstPs.setClob(1, new FileReader("/tmp/clob.txt"));
                        dstPs.executeUpdate();
                    }
                }
            }
        }
    }
}

开启4G内存依然OOM,可见最有效的方式还是通过byte以及MySQL的convert做字符集转换的方式
当然这样也仍然只能处理1G以内的大小,因为MySQL的max_allowed_packet最大为1G暂时无法避免

【版权声明】本文为华为云社区用户原创内容,转载时必须标注文章的来源(华为云社区)、文章链接、文章作者等基本信息, 否则作者和本社区有权追究责任。如果您发现本社区中有涉嫌抄袭的内容,欢迎发送邮件进行举报,并提供相关证据,一经查实,本社区将立刻删除涉嫌侵权内容,举报邮箱: cloudbbs@huaweicloud.com
  • 点赞
  • 收藏
  • 关注作者

评论(0

0/1000
抱歉,系统识别当前为高风险访问,暂不支持该操作

全部回复

上滑加载中

设置昵称

在此一键设置昵称,即可参与社区互动!

*长度不超过10个汉字或20个英文字符,设置后3个月内不可修改。

*长度不超过10个汉字或20个英文字符,设置后3个月内不可修改。