Skip to content

Commit fe129ed

Browse files
committed
CONC-299: Add support for missing collation and character sets
Added the character sets and collations described in CONC-299. No additional test added, since charset/test_conc33 checks if all character sets and collations from server are supported.
1 parent 4ea36fd commit fe129ed

File tree

1 file changed

+93
-0
lines changed

1 file changed

+93
-0
lines changed

libmariadb/ma_charset.c

Lines changed: 93 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -493,6 +493,46 @@ mysql_mbcharlen_utf32(unsigned int utf32 __attribute((unused)))
493493
}
494494
/* }}} */
495495

496+
/* {{{ gb18030 functions */
497+
#define is_gb18030_odd(c) (0x81 <= (unsigned char) (c) && (unsigned char) (c) <= 0xFE)
498+
#define is_gb18030_even_2(c) ((0x40 <= (unsigned char) (c) && (unsigned char) (c) <= 0x7E) || (0x80 <= (unsigned char) (c) && (unsigned char) (c) <= 0xFE))
499+
#define is_gb18030_even_4(c) (0x30 <= (unsigned char) (c) && (unsigned char) (c) <= 0x39)
500+
501+
502+
static unsigned int mysql_mbcharlen_gb18030(unsigned int c)
503+
{
504+
if (c <= 0xFF) {
505+
return !is_gb18030_odd(c);
506+
}
507+
if (c > 0xFFFF || !is_gb18030_odd((c >> 8) & 0xFF)) {
508+
return 0;
509+
}
510+
if (is_gb18030_even_2((c & 0xFF))) {
511+
return 2;
512+
}
513+
if (is_gb18030_even_4((c & 0xFF))) {
514+
return 4;
515+
}
516+
517+
return 0;
518+
}
519+
520+
static unsigned int check_mb_gb18030_valid(const char * start, const char * end)
521+
{
522+
if (end - start <= 1 || !is_gb18030_odd(start[0])) {
523+
return 0;
524+
}
525+
526+
if (is_gb18030_even_2(start[1])) {
527+
return 2;
528+
} else if (end - start > 3 && is_gb18030_even_4(start[1]) && is_gb18030_odd(start[2]) && is_gb18030_even_4(start[3])) {
529+
return 4;
530+
}
531+
532+
return 0;
533+
}
534+
/* }}} */
535+
496536
/*
497537
The server compiles sometimes the full utf-8 (the mb4) as utf8m4, and the old as utf8,
498538
for BC reasons. Sometimes, utf8mb4 is just utf8 but the old charsets are utf8mb3.
@@ -586,6 +626,7 @@ const MARIADB_CHARSET_INFO mariadb_compiled_charsets[] =
586626
{ 73, 1, "keybcs2", "keybcs2_bin", "", 0, "", 1, 1, NULL, NULL},
587627
{ 74, 1, "koi8r", "koi8r_bin", "", 20866, "KOI8R", 1, 1, NULL, NULL},
588628
{ 75, 1, "koi8u", "koi8u_bin", "", 21866, "KOI8U", 1, 1, NULL, NULL},
629+
{ 76, 1, UTF8_MB3, UTF8_MB3"_tolower_ci", "", 65001, "UTF-8", 1, 3, mysql_mbcharlen_utf8mb3, check_mb_utf8mb3_valid},
589630
{ 77, 1, "latin2", "latin2_bin", "", 28592, "LATIN2", 1, 1, NULL, NULL},
590631
{ 78, 1, "latin5", "latin5_bin", "", 1254, "LATIN5", 1, 1, NULL, NULL},
591632
{ 79, 1, "latin7", "latin7_bin", "", 28603, "LATIN7", 1, 1, NULL, NULL},
@@ -730,8 +771,60 @@ const MARIADB_CHARSET_INFO mariadb_compiled_charsets[] =
730771
{ 245, 1, UTF8_MB4, UTF8_MB4"_croatian_mysql561_ci", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
731772
{ 246, 1, UTF8_MB4, UTF8_MB4"_unicode_520_ci", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
732773
{ 247, 1, UTF8_MB4, UTF8_MB4"_vietnamese_ci", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
774+
{ 248, 1, "gb18030", "gb18030_chinese_ci", "", 54936, "GB18030", 1, 4, mysql_mbcharlen_gb18030, check_mb_gb18030_valid},
775+
{ 249, 1, "gb18030", "gb18030_bin", "", 54936, "GB18030", 1, 4, mysql_mbcharlen_gb18030, check_mb_gb18030_valid},
776+
{ 250, 1, "gb18030", "gb18030_unicode_520_ci", "", 54936, "GB18030", 1, 4, mysql_mbcharlen_gb18030, check_mb_gb18030_valid},
777+
733778

734779
{ 254, 1, UTF8_MB3, UTF8_MB3"_general_cs", "", 65001, "UTF-8", 1, 3, mysql_mbcharlen_utf8, check_mb_utf8_valid},
780+
781+
{ 255, 1, UTF8_MB4, UTF8_MB4"_0900_ai_ci", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
782+
{ 256, 1, UTF8_MB4, UTF8_MB4"_de_pb_0900_ai_ci", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
783+
{ 257, 1, UTF8_MB4, UTF8_MB4"_is_0900_ai_ci", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
784+
{ 258, 1, UTF8_MB4, UTF8_MB4"_lv_0900_ai_ci", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
785+
{ 259, 1, UTF8_MB4, UTF8_MB4"_ro_0900_ai_ci", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
786+
{ 260, 1, UTF8_MB4, UTF8_MB4"_sl_0900_ai_ci", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
787+
{ 261, 1, UTF8_MB4, UTF8_MB4"_pl_0900_ai_ci", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
788+
{ 262, 1, UTF8_MB4, UTF8_MB4"_et_0900_ai_ci", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
789+
{ 263, 1, UTF8_MB4, UTF8_MB4"_es_0900_ai_ci", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
790+
{ 264, 1, UTF8_MB4, UTF8_MB4"_sv_0900_ai_ci", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
791+
{ 265, 1, UTF8_MB4, UTF8_MB4"_tr_0900_ai_ci", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
792+
{ 266, 1, UTF8_MB4, UTF8_MB4"_cs_0900_ai_ci", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
793+
{ 267, 1, UTF8_MB4, UTF8_MB4"_da_0900_ai_ci", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
794+
{ 268, 1, UTF8_MB4, UTF8_MB4"_lt_0900_ai_ci", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
795+
{ 269, 1, UTF8_MB4, UTF8_MB4"_sk_0900_ai_ci", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
796+
{ 270, 1, UTF8_MB4, UTF8_MB4"_es_trad_0900_ai_ci", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
797+
{ 271, 1, UTF8_MB4, UTF8_MB4"_la_0900_ai_ci", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
798+
{ 273, 1, UTF8_MB4, UTF8_MB4"_eo_0900_ai_ci", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
799+
{ 274, 1, UTF8_MB4, UTF8_MB4"_hu_0900_ai_ci", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
800+
{ 275, 1, UTF8_MB4, UTF8_MB4"_hr_0900_ai_ci", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
801+
{ 277, 1, UTF8_MB4, UTF8_MB4"_vi_0900_ai_ci", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
802+
{ 278, 1, UTF8_MB4, UTF8_MB4"_0900_as_cs", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
803+
{ 279, 1, UTF8_MB4, UTF8_MB4"_de_pb__0900_as_cs", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
804+
{ 280, 1, UTF8_MB4, UTF8_MB4"_is_0900_as_cs", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
805+
{ 281, 1, UTF8_MB4, UTF8_MB4"_lv_0900_as_cs", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
806+
{ 282, 1, UTF8_MB4, UTF8_MB4"_ro_0900_as_cs", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
807+
{ 283, 1, UTF8_MB4, UTF8_MB4"_sl_0900_as_cs", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
808+
{ 284, 1, UTF8_MB4, UTF8_MB4"_pl_0900_as_cs", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
809+
{ 285, 1, UTF8_MB4, UTF8_MB4"_et_0900_as_cs", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
810+
{ 286, 1, UTF8_MB4, UTF8_MB4"_es_0900_as_cs", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
811+
{ 287, 1, UTF8_MB4, UTF8_MB4"_sv_0900_as_cs", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
812+
{ 288, 1, UTF8_MB4, UTF8_MB4"_tr_0900_as_cs", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
813+
{ 289, 1, UTF8_MB4, UTF8_MB4"_cs_0900_as_cs", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
814+
{ 290, 1, UTF8_MB4, UTF8_MB4"_da_0900_as_cs", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
815+
{ 291, 1, UTF8_MB4, UTF8_MB4"_lt_0900_as_cs", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
816+
{ 292, 1, UTF8_MB4, UTF8_MB4"_sk_0900_as_cs", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
817+
{ 293, 1, UTF8_MB4, UTF8_MB4"_es_trad_0900_as_cs", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
818+
{ 294, 1, UTF8_MB4, UTF8_MB4"_la_0900_as_cs", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
819+
{ 296, 1, UTF8_MB4, UTF8_MB4"_eo_0900_as_cs", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
820+
{ 297, 1, UTF8_MB4, UTF8_MB4"_hu_0900_as_cs", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
821+
{ 298, 1, UTF8_MB4, UTF8_MB4"_hr_0900_as_cs", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
822+
{ 300, 1, UTF8_MB4, UTF8_MB4"_vi_0900_as_cs", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
823+
{ 303, 1, UTF8_MB4, UTF8_MB4"_ja_0900_as_cs", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
824+
{ 304, 1, UTF8_MB4, UTF8_MB4"_ja_0900_as_cs_ks", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
825+
{ 305, 1, UTF8_MB4, UTF8_MB4"_0900_as_ci", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
826+
{ 306, 1, UTF8_MB4, UTF8_MB4"_ru_0900_as_ci", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
827+
{ 307, 1, UTF8_MB4, UTF8_MB4"_ru_0900_as_cs", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
735828
{ 576, 1, UTF8_MB3, UTF8_MB3"_croatian_ci", "", 65001, "UTF-8", 1, 3, mysql_mbcharlen_utf8mb3, check_mb_utf8mb3_valid}, /*MDB*/
736829
{ 577, 1, UTF8_MB3, UTF8_MB3"_myanmar_ci", "", 65001, "UTF-8", 1, 3, mysql_mbcharlen_utf8mb3, check_mb_utf8mb3_valid}, /*MDB*/
737830
{ 578, 1, UTF8_MB3, UTF8_MB3"_thai_520_w2", "", 65001, "UTF-8", 1, 3, mysql_mbcharlen_utf8mb3, check_mb_utf8mb3_valid}, /*MDB*/

0 commit comments

Comments
 (0)