跨字符集迁移数据库
【摘要】 普通迁移1、创建一个gbk字符集的表create table t_gbk (val varchar(32)) default character set gbk2、插入一条数据mysql> insert into t_gbk values('华为云');Query OK, 1 row affected (0.02 sec)mysql> select * from t_gbk;+------...
普通迁移
1、创建一个gbk字符集的表
create table t_gbk (
val varchar(32)
) default character set gbk
2、插入一条数据
mysql> insert into t_gbk values('华为云');
Query OK, 1 row affected (0.02 sec)
mysql> select * from t_gbk;
+--------+
| val |
+--------+
| 华为云 |
+--------+
3、创建一个utf8字符集的表
create table t_utf8 (
val varchar(32)
) default character set utf8
4、我们用java来迁移这张表
String srcSql = "SELECT * from t_gbk";
String dstSql = "INSERT INTO t_utf8 values (?)";
try (Connection srcConn = getConn(srcUrl, srcUser, srcPwd)) {
try (Statement srcStat = srcConn.createStatement()) {
try (ResultSet rs = srcStat.executeQuery(srcSql)) {
try (Connection dstConn = getConn(dstUrl, dstUser, dstPwd)) {
while (rs.next()) {
try (PreparedStatement dstPs = dstConn.prepareStatement(dstSql)){
dstPs.setString(1, rs.getString(1));
dstPs.executeUpdate();
}
}
}
}
}
}
5、我们来查询
mysql> select * from t_utf8;
+--------+
| val |
+--------+
| 华为云 |
+--------+
可见数据完整迁移过去了
大字段迁移
有时候当客户使用的是longtext字段的时候,因为longtext的长度可以达到几百M,这个时候
1、创建一张表
create table t_gbk_long (
val longtext
) default character set gbk;
2、用jdbc插入一个大字段(运行这段代码需要开启6G内存)
String initSql = "INSERT INTO t_gbk_long values (?)";
StringBuilder sb = new StringBuilder();
for (int i=0; i<(1<<24); i++) {
sb.append("华为云");
}
try (Connection initConn = getConn(srcUrl, srcUser, srcPwd)) {
try (PreparedStatement initPs = initConn.prepareStatement(initSql)){
initPs.setString(1, sb.toString());
initPs.executeUpdate();
}
}
通过SQL再插入一条更大的
mysql> insert into t_gbk_long select concat(val,val,val,val,val) from t_gbk_long;
Query OK, 1 row affected (14.44 sec)
Records: 1 Duplicates: 0 Warnings: 0
mysql> select length(val) from t_gbk_long;
+-------------+
| length(val) |
+-------------+
| 100663296 |
| 503316480 |
+-------------+
3、目标端创建一个表
create table t_gbk_long_1 (
val longtext
) default character set gbk;
4、jdbc迁移String
String srcSql = "SELECT * from t_gbk_long";
String dstSql = "INSERT INTO t_gbk_long_2 values (?)";
try (Connection srcConn = getConn(srcUrl, srcUser, srcPwd)) {
try (Statement srcStat = srcConn.createStatement()) {
try (ResultSet rs = srcStat.executeQuery(srcSql)) {
try (Connection dstConn = getConn(dstUrl, dstUser, dstPwd)) {
while (rs.next()) {
try (PreparedStatement dstPs = dstConn.prepareStatement(dstSql)){
dstPs.setString(1, rs.getString(1));
dstPs.executeUpdate();
}
}
}
}
}
}
3G内存,结果OOM了
Exception in thread "main" java.lang.OutOfMemoryError: Java heap space
at java.lang.StringCoding$StringDecoder.decode(StringCoding.java:149)
at java.lang.StringCoding.decode(StringCoding.java:193)
at java.lang.String.<init>(String.java:426)
at com.mysql.cj.util.StringUtils.toString(StringUtils.java:1695)
at com.mysql.cj.result.StringValueFactory.createFromBytes(StringValueFactory.java:129)
at com.mysql.cj.result.StringValueFactory.createFromBytes(StringValueFactory.java:48)
at com.mysql.cj.protocol.a.MysqlTextValueDecoder.decodeByteArray(MysqlTextValueDecoder.java:134)
at com.mysql.cj.protocol.result.AbstractResultsetRow.decodeAndCreateReturnValue(AbstractResultsetRow.java:133)
at com.mysql.cj.protocol.result.AbstractResultsetRow.getValueFromBytes(AbstractResultsetRow.java:241)
at com.mysql.cj.protocol.a.result.TextBufferRow.getValue(TextBufferRow.java:132)
at com.mysql.cj.jdbc.result.ResultSetImpl.getString(ResultSetImpl.java:839)
需要把内存调到4G才行
如果我们用byte进行读写,则可以2.5G内存就可以了
5、jdbc迁移bytes
String srcSql = "SELECT * from t_gbk_long";
String dstSql = "INSERT INTO t_gbk_long_2 values (?)";
try (Connection srcConn = getConn(srcUrl, srcUser, srcPwd)) {
try (Statement srcStat = srcConn.createStatement()) {
try (ResultSet rs = srcStat.executeQuery(srcSql)) {
try (Connection dstConn = getConn(dstUrl, dstUser, dstPwd)) {
while (rs.next()) {
try (PreparedStatement dstPs = dstConn.prepareStatement(dstSql)){
dstPs.setBinaryStream(1, new ByteArrayInputStream(rs.getBytes(1)));
dstPs.executeUpdate();
}
}
}
}
}
}
这里面t_gbk_long_2与t_gbk_long表结构一致,字符集也一致,此时相同字符集的bytes一致,因此不会有问题,那么如果字符集不一致会是什么样的呢
6、建表
create table t_utf8_long (
val longtext
) default character set utf8;
7、jdbc迁移bytes
String srcSql = "SELECT * from t_gbk_long";
String dstSql = "INSERT INTO t_utf8_long values (?)";
try (Connection srcConn = getConn(srcUrl, srcUser, srcPwd)) {
try (Statement srcStat = srcConn.createStatement()) {
try (ResultSet rs = srcStat.executeQuery(srcSql)) {
try (Connection dstConn = getConn(dstUrl, dstUser, dstPwd)) {
while (rs.next()) {
try (PreparedStatement dstPs = dstConn.prepareStatement(dstSql)){
dstPs.setBinaryStream(1, new ByteArrayInputStream(rs.getBytes(1)));
dstPs.executeUpdate();
}
}
}
}
}
}
结果:
Exception in thread "main" java.sql.SQLException: Incorrect string value: '\xBB\xAA\xCE\xAA\xD4\xC6...' for column 'val' at row 1
at com.mysql.cj.jdbc.exceptions.SQLError.createSQLException(SQLError.java:129)
at com.mysql.cj.jdbc.exceptions.SQLError.createSQLException(SQLError.java:97)
at com.mysql.cj.jdbc.exceptions.SQLExceptionsMapping.translateException(SQLExceptionsMapping.java:122)
at com.mysql.cj.jdbc.ClientPreparedStatement.executeInternal(ClientPreparedStatement.java:953)
at com.mysql.cj.jdbc.ClientPreparedStatement.executeUpdateInternal(ClientPreparedStatement.java:1092)
at com.mysql.cj.jdbc.ClientPreparedStatement.executeUpdateInternal(ClientPreparedStatement.java:1040)
at com.mysql.cj.jdbc.ClientPreparedStatement.executeLargeUpdate(ClientPreparedStatement.java:1347)
可见不同的字符集的数据不能用byte做迁移,我们发现mysql有一个convert函数,它可以对不同字符集进行转换
8、利用convert进行处理
String srcSql = "SELECT * from t_gbk_long";
String dstSql = "INSERT INTO t_utf8_long values (convert(? using gbk))";
try (Connection srcConn = getConn(srcUrl, srcUser, srcPwd)) {
try (Statement srcStat = srcConn.createStatement()) {
try (ResultSet rs = srcStat.executeQuery(srcSql)) {
try (Connection dstConn = getConn(dstUrl, dstUser, dstPwd)) {
while (rs.next()) {
try (PreparedStatement dstPs = dstConn.prepareStatement(dstSql)){
dstPs.setBinaryStream(1, new ByteArrayInputStream(rs.getBytes(1)));
dstPs.executeUpdate();
}
}
}
}
}
}
看有一些文章写到,读取大字段用clob或者stream来读取,我们可以试一下
9、使用CLOB
String srcSql = "SELECT * from t_gbk_long";
String dstSql = "INSERT INTO t_utf8_long values (?)";
try (Connection srcConn = getConn(srcUrl, srcUser, srcPwd)) {
try (Statement srcStat = srcConn.createStatement()) {
try (ResultSet rs = srcStat.executeQuery(srcSql)) {
try (Connection dstConn = getConn(dstUrl, dstUser, dstPwd)) {
while (rs.next()) {
Clob clob = rs.getClob(1);
try (BufferedWriter writer = new BufferedWriter(new FileWriter("/tmp/clob.txt"))) {
try (BufferedReader reader = new BufferedReader(clob.getCharacterStream())) {
CharBuffer buf = CharBuffer.wrap(new char[8 * 1024]);
while ((reader.read(buf) > 0)) {
writer.write(buf.array(), 0, buf.limit());
buf.flip();
}
}
}
try (PreparedStatement dstPs = dstConn.prepareStatement(dstSql)) {
dstPs.setClob(1, new FileReader("/tmp/clob.txt"));
dstPs.executeUpdate();
}
}
}
}
}
}
开启4G内存依然OOM,可见最有效的方式还是通过byte以及MySQL的convert做字符集转换的方式
当然这样也仍然只能处理1G以内的大小,因为MySQL的max_allowed_packet最大为1G暂时无法避免
【版权声明】本文为华为云社区用户原创内容,转载时必须标注文章的来源(华为云社区)、文章链接、文章作者等基本信息, 否则作者和本社区有权追究责任。如果您发现本社区中有涉嫌抄袭的内容,欢迎发送邮件进行举报,并提供相关证据,一经查实,本社区将立刻删除涉嫌侵权内容,举报邮箱:
cloudbbs@huaweicloud.com
- 点赞
- 收藏
- 关注作者
评论(0)