1. Setup the mysql db:
CREATE DATABASE `unicode_test_5` character set UTF8mb4 collate utf8mb4_unicode_ci;
Query OK, 1 row affected (0.00 sec)
mysql> use unicode_test_5;
Database changed
mysql> show variables like 'character_set_database' ;
| Variable_name | Value |
| character_set_database | utf8mb4 |
1 row in set (0.01 sec)
2. Submit the connector :
{ "connector.class" : "io.debezium.connector.mysql.MySqlConnector" , "database.history.kafka.bootstrap.servers" : "broker:29092" , "database.history.kafka.topic" : "debz.schema-changes.unicode_test_5" , "database.hostname" : "mysql" , "database.port" : "3306" , "database.user" : "confluent" , "database.password" : "confluent" , "database.whitelist" : "unicode_test_5" , "" : "1013" , "" : "unicode_test_5" , "name" : "mysql-unicode-test-8-connector" , "database.history.skip.unparseable.ddl" : " true " , "" : "120000" , "" : "120000" , "snapshot.mode" : "schema_only" , "snapshot.locking.mode" : "none" , "tasks.max" : "1" }
3. Create the table :
mysql> CREATE TABLE `test` (`description` varchar(255));
Query OK, 0 rows affected (0.02 sec)
mysql> show create table test;
| Table | Create Table |
| test | CREATE TABLE `test` (
`description` varchar(255) COLLATE utf8mb4_unicode_ci DEFAULT NULL
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci |
1 row in set (0.00 sec)
Expected result : The description column will be parse using UTF-8
Actual result : The description column parsed using windows-1252 charset
Related log :
[2020-09-28 15:26:41,345] DEBUG Recorded DDL statements for database 'unicode_test_5' : CREATE TABLE `test` (`description` varchar(255)) (io.debezium.connector.mysql.BinlogReader)
[2020-09-28 15:26:41,346] DEBUG - field 'description' (STRING) from column description VARCHAR(255) CHARSET latin1 DEFAULT VALUE NULL (io.debezium.relational.TableSchemaBuilder)
[2020-09-28 15:26:41,347] DEBUG Mapped columns for table 'unicode_test_5.test' to schema: { "name" : "unicode_test_5.unicode_test_5.test.Value" , "type" : "STRUCT" , "optional" : " true " , "fields" : [{ "name" : "description" , "index" : "0" , "schema" : { "type" : "STRING" , "optional" : " true " }}]} (io.debezium.relational.TableSchemaBuilder)
[2020-09-28 15:26:41,347] DEBUG Using windows-1252 charset by default for column: description VARCHAR(255) CHARSET latin1 DEFAULT VALUE NULL (io.debezium.connector.mysql.MySqlValueConverters)
Same issue comes when altering that table
mysql> alter table test add column `description2` text;
Printed on log :
[2020-09-28 15:31:35,315] DEBUG Received query command: Event{header=EventHeaderV4{timestamp=1601307095000, eventType=QUERY, serverId=1, headerLength=19, dataLength=122, nextPosition=16575, flags=0}, data=QueryEventData{threadId=69, executionTime=0, errorCode=0, database= 'unicode_test_5' , sql= 'alter table test add column `description2` text' }} (io.debezium.connector.mysql.BinlogReader)
[2020-09-28 15:31:35,317] DEBUG Recorded DDL statements for database 'unicode_test_5' : alter table test add column `description2` text (io.debezium.connector.mysql.BinlogReader)
[2020-09-28 15:31:35,319] DEBUG - field 'description' (STRING) from column description VARCHAR(255) CHARSET latin1 DEFAULT VALUE NULL (io.debezium.relational.TableSchemaBuilder)
[2020-09-28 15:31:35,320] DEBUG - field 'description2' (STRING) from column description2 TEXT CHARSET latin1 DEFAULT VALUE NULL (io.debezium.relational.TableSchemaBuilder)
[2020-09-28 15:31:35,320] DEBUG Mapped columns for table 'unicode_test_5.test' to schema: { "name" : "unicode_test_5.unicode_test_5.test.Value" , "type" : "STRUCT" , "optional" : " true " , "fields" : [{ "name" : "description" , "index" : "0" , "schema" : { "type" : "STRING" , "optional" : " true " }}, { "name" : "description2" , "index" : "1" , "schema" : { "type" : "STRING" , "optional" : " true " }}]} (io.debezium.relational.TableSchemaBuilder)
[2020-09-28 15:31:35,321] DEBUG Using windows-1252 charset by default for column: description VARCHAR(255) CHARSET latin1 DEFAULT VALUE NULL (io.debezium.connector.mysql.MySqlValueConverters)
[2020-09-28 15:31:35,321] DEBUG Using windows-1252 charset by default for column: description2 TEXT CHARSET latin1 DEFAULT VALUE NULL (io.debezium.connector.mysql.MySqlValueConverters)