/* Conversion module for ARIB-STD-B24. Copyright (C) 1998-2014 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation; either version 2.1 of the License, or (at your option) any later version. The GNU C Library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with the GNU C Library; if not, see . */ /* * Conversion module for the character encoding * defined in ARIB STD-B24 Volume 1, Part 2, Chapter 7. * http://www.arib.or.jp/english/html/overview/doc/6-STD-B24v5_2-1p3-E1.pdf * http://www.arib.or.jp/english/html/overview/sb_ej.html * https://sites.google.com/site/unicodesymbols/Home/japanese-tv-symbols/ * It is based on ISO-2022, and used in Japanese digital televsion. * * Note 1: "mosaic" characters are not supported in this module. * Note 2: Control characters (for subtitles) are discarded. */ #include #include #include #include #include #include #include "jis0201.h" #include "jis0208.h" #include "jisx0213.h" /* Definitions used in the body of the `gconv' function. */ #define CHARSET_NAME "ARIB-STD-B24//" #define DEFINE_INIT 1 #define DEFINE_FINI 1 #define ONE_DIRECTION 0 #define FROM_LOOP from_aribb24_loop #define TO_LOOP to_aribb24_loop #define FROM_LOOP_MIN_NEEDED_FROM 1 #define FROM_LOOP_MAX_NEEDED_FROM 1 #define FROM_LOOP_MIN_NEEDED_TO 4 #define FROM_LOOP_MAX_NEEDED_TO (4 * 4) #define TO_LOOP_MIN_NEEDED_FROM 4 #define TO_LOOP_MAX_NEEDED_FROM 4 #define TO_LOOP_MIN_NEEDED_TO 1 #define TO_LOOP_MAX_NEEDED_TO 7 #define PREPARE_LOOP \ __mbstate_t saved_state; \ __mbstate_t *statep = data->__statep; \ status = __GCONV_OK; /* Since we might have to reset input pointer we must be able to save and retore the state. */ #define SAVE_RESET_STATE(Save) \ { \ if (Save) \ saved_state = *statep; \ else \ *statep = saved_state; \ } /* During UCS-4 to ARIB-STD-B24 conversion, the state contains the last two bytes to be output, in .prev member. */ /* Since this is a stateful encoding we have to provide code which resets the output state to the initial state. This has to be done during the flushing. */ #define EMIT_SHIFT_TO_INIT \ { \ if (!FROM_DIRECTION) \ status = out_buffered((struct state_to *) data->__statep, \ &outbuf, outend); \ /* we don't have to emit anything, just reset the state. */ \ memset (data->__statep, '\0', sizeof (*data->__statep)); \ } /* This makes obvious what everybody knows: 0x1b is the Esc character. */ #define ESC 0x1b /* other control characters */ #define SS2 0x19 #define SS3 0x1d #define LS0 0x0f #define LS1 0x0e #define LS2 0x6e #define LS3 0x6f #define LS1R 0x7e #define LS2R 0x7d #define LS3R 0x7c #define LF 0x0a #define CR 0x0d #define BEL 0x07 #define BS 0x08 #define COL 0x90 #define CDC 0x92 #define MACRO_CTRL 0x95 #define CSI 0x9b #define TIME 0x9d /* code sets */ enum g_set { KANJI_set = '\x42', /* 2Byte set */ ASCII_set = '\x40', ASCII_x_set = '\x4a', HIRAGANA_set = '\x30', KATAKANA_set = '\x31', MOSAIC_A_set = '\x32', MOSAIC_B_set = '\x33', MOSAIC_C_set = '\x34', MOSAIC_D_set = '\x35', PROP_ASCII_set = '\x36', PROP_HIRA_set = '\x37', PROP_KATA_set = '\x38', JIS0201_KATA_set = '\x49', JISX0213_1_set = '\x39', /* 2Byte set */ JISX0213_2_set = '\x3a', /* 2Byte set */ EXTRA_SYMBOLS_set = '\x3b', /* 2Byte set */ DRCS0_set = 0x40 | 0x80, /* 2Byte set */ DRCS1_set = 0x41 | 0x80, DRCS15_set = 0x4f | 0x80, MACRO_set = 0x70 | 0x80, }; /* First define the conversion function from ARIB-STD-B24 to UCS-4. */ enum mode_e { NORMAL, ESCAPE, G_SEL_1B, G_SEL_MB, CTRL_SEQ, DESIGNATE_MB, DRCS_SEL_1B, DRCS_SEL_MB, MB_2ND, }; /* * __GCONV_INPUT_INCOMPLETE is never used in this conversion, thus * we can re-use mbstate_t.__value and .__count:3 for the other purpose. */ struct state_from { /* __count */ uint8_t cnt:3; /* for use in skelton.c. always 0 */ uint8_t pad0:1; uint8_t gl:2; /* idx of the G-set invoked into GL */ uint8_t gr:2; /* ... to GR */ uint8_t ss:2; /* SS state. 0: no shift, 2:SS2, 3:SS3 */ uint8_t gidx:2; /* currently designated G-set */ uint8_t mode:4; /* current input mode. see below. */ uint8_t skip; /* [CTRL_SEQ] # of char to skip */ uint8_t prev; /* previously input char [in MB_2ND] or,*/ /* input char to wait for. [CTRL_SEQ (.skip == 0)] */ /* __value */ uint8_t g[4]; /* code set for G0..G3 */ } __attribute__((packed)); static const struct state_from def_state_from = { .cnt = 0, .gl = 0, .gr = 2, .ss = 0, .gidx = 0, .mode = NORMAL, .skip = 0, .prev = '\0', .g[0] = KANJI_set, .g[1] = ASCII_set, .g[2] = HIRAGANA_set, .g[3] = KATAKANA_set, }; #define EXTRA_LOOP_DECLS , __mbstate_t *statep #define EXTRA_LOOP_ARGS , statep #define INIT_PARAMS \ struct state_from st = *((struct state_from *)statep); \ if (st.g[0] == 0) \ st = def_state_from; #define UPDATE_PARAMS *statep = *((__mbstate_t *)&st) #define LOOP_NEED_FLAGS #define MIN_NEEDED_INPUT FROM_LOOP_MIN_NEEDED_FROM #define MAX_NEEDED_INPUT FROM_LOOP_MAX_NEEDED_FROM #define MIN_NEEDED_OUTPUT FROM_LOOP_MIN_NEEDED_TO #define MAX_NEEDED_OUTPUT FROM_LOOP_MAX_NEEDED_TO #define LOOPFCT FROM_LOOP /* tables and functions used in BODY */ static const uint16_t kata_punc[] = { 0x30fd, 0x30fe, 0x30fc, 0x3002, 0x300c, 0x300d, 0x3001, 0x30fb }; static const uint16_t hira_punc[] = { 0x309d, 0x309e }; static const uint16_t nonspacing_symbol[] = { 0x0301, 0x0300, 0x0308, 0x0302, 0x0304, 0x0332 }; static const uint32_t extra_kanji[] = { /* row 85 */ /* col 0..15 */ 0, 0x3402, 0x20158, 0x4efd, 0x4eff, 0x4f9a, 0x4fc9, 0x509c, 0x511e, 0x51bc, 0x351f, 0x5307, 0x5361, 0x536c, 0x8a79, 0x20bb7, /* col. 16..31 */ 0x544d, 0x5496, 0x549c, 0x54a9, 0x550e, 0x554a, 0x5672, 0x56e4, 0x5733, 0x5734, 0xfa10, 0x5880, 0x59e4, 0x5a23, 0x5a55, 0x5bec, /* col. 32..47 */ 0xfa11, 0x37e2, 0x5eac, 0x5f34, 0x5f45, 0x5fb7, 0x6017, 0xfa6b, 0x6130, 0x6624, 0x66c8, 0x66d9, 0x66fa, 0x66fb, 0x6852, 0x9fc4, /* col. 48..63 */ 0x6911, 0x693b, 0x6a45, 0x6a91, 0x6adb, 0x233cc, 0x233fe, 0x235c4, 0x6bf1, 0x6ce0, 0x6d2e, 0xfa45, 0x6dbf, 0x6dca, 0x6df8, 0xfa46, /* col. 64..79 */ 0x6f5e, 0x6ff9, 0x7064, 0xfa6c, 0x242ee, 0x7147, 0x71c1, 0x7200, 0x739f, 0x73a8, 0x73c9, 0x73d6, 0x741b, 0x7421, 0xfa4a, 0x7426, /* col. 80..96 */ 0x742a, 0x742c, 0x7439, 0x744b, 0x3eda, 0x7575, 0x7581, 0x7772, 0x4093, 0x78c8, 0x78e0, 0x7947, 0x79ae, 0x9fc6, 0x4103, 0, /* row 86 */ /* col 0..15 */ 0, 0x9fc5, 0x79da, 0x7a1e, 0x7b7f, 0x7c31, 0x4264, 0x7d8b, 0x7fa1, 0x8118, 0x813a, 0xfa6d, 0x82ae, 0x845b, 0x84dc, 0x84ec, /* col. 16..31 */ 0x8559, 0x85ce, 0x8755, 0x87ec, 0x880b, 0x88f5, 0x89d2, 0x8af6, 0x8dce, 0x8fbb, 0x8ff6, 0x90dd, 0x9127, 0x912d, 0x91b2, 0x9233, /* col. 32..43 */ 0x9288, 0x9321, 0x9348, 0x9592, 0x96de, 0x9903, 0x9940, 0x9ad9, 0x9bd6, 0x9dd7, 0x9eb4, 0x9eb5 }; static const uint32_t extra_symbols[5][96] = { /* row 90 */ { /* col 0..15 */ 0, 0x26cc, 0x26cd, 0x2762, 0x26cf, 0x26d0, 0x26d1, 0, 0x26d2, 0x26d5, 0x26d3, 0x26d4, 0, 0, 0, 0, /* col 16..31 */ 0x1f17f, 0x1f18a, 0, 0, 0x26d6, 0x26d7, 0x26d8, 0x26d9, 0x26da, 0x26db, 0x26dc, 0x26dd, 0x26de, 0x26df, 0x26e0, 0x26e1, /* col 32..47 */ 0x2b55, 0x3248, 0x3249, 0x324a, 0x324b, 0x324c, 0x324d, 0x324e, 0x324f, 0, 0, 0, 0, 0x2491, 0x2492, 0x2493, /* col 48..63 */ 0x1f14a, 0x1f14c, 0x1f13F, 0x1f146, 0x1f14b, 0x1f210, 0x1f211, 0x1f212, 0x1f213, 0x1f142, 0x1f214, 0x1f215, 0x1f216, 0x1f14d, 0x1f131, 0x1f13d, /* col 64..79 */ 0x2b1b, 0x2b24, 0x1f217, 0x1f218, 0x1f219, 0x1f21a, 0x1f21b, 0x26bf, 0x1f21c, 0x1f21d, 0x1f21e, 0x1f21f, 0x1f220, 0x1f221, 0x1f222, 0x1f223, /* col 80..95 */ 0x1f224, 0x1f225, 0x1f14e, 0x3299, 0x1f200, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, /* row 91 */ { /* col 0..15 */ 0, 0x26e3, 0x2b56, 0x2b57, 0x2b58, 0x2b59, 0x2613, 0x328b, 0x3012, 0x26e8, 0x3246, 0x3245, 0x26e9, 0x0fd6, 0x26ea, 0x26eb, /* col 16..31 */ 0x26ec, 0x2668, 0x26ed, 0x26ee, 0x26ef, 0x2693, 0x1f6e7, 0x26f0, 0x26f1, 0x26f2, 0x26f3, 0x26f4, 0x26f5, 0x1f157, 0x24b9, 0x24c8, /* col 32..47 */ 0x26f6, 0x1f15f, 0x1f18b, 0x1f18d, 0x1f18c, 0x1f179, 0x26f7, 0x26f8, 0x26f9, 0x26fa, 0x1f17b, 0x260e, 0x26fb, 0x26fc, 0x26fd, 0x26fe, /* col 48..63 */ 0x1f17c, 0x26ff, }, /* row 92 */ { /* col 0..15 */ 0, 0x27a1, 0x2b05, 0x2b06, 0x2b07, 0x2b2f, 0x2b2e, 0x5e74, 0x6708, 0x65e5, 0x5186, 0x33a1, 0x33a5, 0x339d, 0x33a0, 0x33a4, /* col 16..31 */ 0x1f100, 0x2488, 0x2489, 0x248a, 0x248b, 0x248c, 0x248d, 0x248e, 0x248f, 0x2490, 0, 0, 0, 0, 0, 0, /* col 32..47 */ 0x1f101, 0x1f102, 0x1f103, 0x1f104, 0x1f105, 0x1f106, 0x1f107, 0x1f108, 0x1f109, 0x1f10a, 0x3233, 0x3236, 0x3232, 0x3231, 0x3239, 0x3244, /* col 48..63 */ 0x25b6, 0x25c0, 0x3016, 0x3017, 0x27d0, 0x00b2, 0x00b3, 0x1f12d, 0, 0, 0, 0, 0, 0, 0, 0, /* col 64..79 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* col 80..95 */ 0, 0, 0, 0, 0, 0, 0x1f12c, 0x1f12b, 0x3247, 0x1f190, 0x1f226, 0x213b, 0, 0, 0, 0 }, /* row 93 */ { /* col 0..15 */ 0, 0x322a, 0x322b, 0x322c, 0x322d, 0x322e, 0x322f, 0x3230, 0x3237, 0x337e, 0x337d, 0x337c, 0x337b, 0x2116, 0x2121, 0x3036, /* col 16..31 */ 0x26be, 0x1f240, 0x1f241, 0x1f242, 0x1f243, 0x1f244, 0x1f245, 0x1f246, 0x1f247, 0x1f248, 0x1f12a, 0x1f227, 0x1f228, 0x1f229, 0x1f214, 0x1f22a, /* col 32..47 */ 0x1f22b, 0x1f22c, 0x1f22d, 0x1f22e, 0x1f22f, 0x1f230, 0x1f231, 0x2113, 0x338f, 0x3390, 0x33ca, 0x339e, 0x33a2, 0x3371, 0, 0, /* col 48..63 */ 0x00bd, 0x2189, 0x2153, 0x2154, 0x00bc, 0x00be, 0x2155, 0x2156, 0x2157, 0x2158, 0x2159, 0x215a, 0x2150, 0x215b, 0x2151, 0x2152, /* col 64..79 */ 0x2600, 0x2601, 0x2602, 0x26c4, 0x2616, 0x2617, 0x26c9, 0x26ca, 0x2666, 0x2665, 0x2663, 0x2660, 0x26cb, 0x2a00, 0x203c, 0x2049, /* col 80..95 */ 0x26c5, 0x2614, 0x26c6, 0x2603, 0x26c7, 0x26a1, 0x26c8, 0, 0x269e, 0x269f, 0x266c, 0x260e, 0, 0, 0, 0 }, /* row 94 */ { /* col 0..15 */ 0, 0x2160, 0x2161, 0x2162, 0x2163, 0x2164, 0x2165, 0x2166, 0x2167, 0x2168, 0x2169, 0x216a, 0x216b, 0x2470, 0x2471, 0x2472, /* col 16..31 */ 0x2473, 0x2474, 0x2475, 0x2476, 0x2477, 0x2478, 0x2479, 0x247a, 0x247b, 0x247c, 0x247d, 0x247e, 0x247f, 0x3251, 0x3252, 0x3253, /* col 32..47 */ 0x3254, 0x1f110, 0x1f111, 0x1f112, 0x1f113, 0x1f114, 0x1f115, 0x1f116, 0x1f117, 0x1f118, 0x1f119, 0x1f11a, 0x1f11b, 0x1f11c, 0x1f11d, 0x1f11e, /* col 48..63 */ 0x1f11f, 0x1f120, 0x1f121, 0x1f122, 0x1f123, 0x1f124, 0x1f125, 0x1f126, 0x1f127, 0x1f128, 0x1f129, 0x3255, 0x3256, 0x3257, 0x3258, 0x3259, /* col 64..79 */ 0x325a, 0x2460, 0x2461, 0x2462, 0x2463, 0x2464, 0x2465, 0x2466, 0x2467, 0x2468, 0x2469, 0x246a, 0x246b, 0x246c, 0x246d, 0x246e, /* col 80..95 */ 0x246f, 0x2776, 0x2777, 0x2778, 0x2779, 0x277a, 0x277b, 0x277c, 0x277d, 0x277e, 0x277f, 0x24eb, 0x24ec, 0x325b, 0, 0 }, }; struct mchar_entry { uint32_t len; uint32_t to[4]; }; /* list of transliterations. */ /* small/subscript-ish KANJI. map to the normal sized version */ static const struct mchar_entry ext_sym_smallk[] = { {.len = 1, .to = { 0x6c0f }}, {.len = 1, .to = { 0x526f }}, {.len = 1, .to = { 0x5143 }}, {.len = 1, .to = { 0x6545 }}, {.len = 1, .to = { 0x52ed }}, {.len = 1, .to = { 0x65b0 }}, }; /* symbols of music instruments */ static const struct mchar_entry ext_sym_music[] = { {.len = 4, .to = { 0x0028, 0x0076, 0x006e, 0x0029 }}, {.len = 4, .to = { 0x0028, 0x006f, 0x0062, 0x0029 }}, {.len = 4, .to = { 0x0028, 0x0063, 0x0062, 0x0029 }}, {.len = 3, .to = { 0x0028, 0x0063, 0x0065 }}, {.len = 3, .to = { 0x006d, 0x0062, 0x0029 }}, {.len = 4, .to = { 0x0028, 0x0068, 0x0070, 0x0029 }}, {.len = 4, .to = { 0x0028, 0x0062, 0x0072, 0x0029 }}, {.len = 3, .to = { 0x0028, 0x0070, 0x0029 }}, {.len = 3, .to = { 0x0028, 0x0073, 0x0029 }}, {.len = 4, .to = { 0x0028, 0x006d, 0x0073, 0x0029 }}, {.len = 3, .to = { 0x0028, 0x0074, 0x0029 }}, {.len = 4, .to = { 0x0028, 0x0062, 0x0073, 0x0029 }}, {.len = 3, .to = { 0x0028, 0x0062, 0x0029 }}, {.len = 4, .to = { 0x0028, 0x0074, 0x0062, 0x0029 }}, {.len = 4, .to = { 0x0028, 0x0076, 0x0070, 0x0029 }}, {.len = 4, .to = { 0x0028, 0x0064, 0x0073, 0x0029 }}, {.len = 4, .to = { 0x0028, 0x0061, 0x0067, 0x0029 }}, {.len = 4, .to = { 0x0028, 0x0065, 0x0067, 0x0029 }}, {.len = 4, .to = { 0x0028, 0x0076, 0x006f, 0x0029 }}, {.len = 4, .to = { 0x0028, 0x0066, 0x006c, 0x0029 }}, {.len = 3, .to = { 0x0028, 0x006b, 0x0065 }}, {.len = 2, .to = { 0x0079, 0x0029 }}, {.len = 3, .to = { 0x0028, 0x0073, 0x0061 }}, {.len = 2, .to = { 0x0078, 0x0029 }}, {.len = 3, .to = { 0x0028, 0x0073, 0x0079 }}, {.len = 2, .to = { 0x006e, 0x0029 }}, {.len = 3, .to = { 0x0028, 0x006f, 0x0072 }}, {.len = 2, .to = { 0x0067, 0x0029 }}, {.len = 3, .to = { 0x0028, 0x0070, 0x0065 }}, {.len = 2, .to = { 0x0072, 0x0029 }}, }; int b24_char_conv (int set, unsigned char c1, unsigned char c2, uint32_t *out) { int len; uint32_t ch; if (set > DRCS0_set && set <= DRCS15_set) set = DRCS0_set; switch (set) { case ASCII_set: case ASCII_x_set: case PROP_ASCII_set: if (c1 == 0x7e) *out = 0x203e; else if (c1 == 0x5c) *out = 0xa5; else *out = c1; return 1; case KATAKANA_set: case PROP_KATA_set: if (c1 <= 0x76) *out = 0x3080 + c1; else *out = kata_punc[c1 - 0x77]; return 1; case HIRAGANA_set: case PROP_HIRA_set: if (c1 <= 0x73) *out = 0x3020 + c1; else if (c1 == 0x77 || c1 == 0x78) *out = hira_punc[c1 - 0x77]; else if (c1 >= 0x79) *out = kata_punc[c1 - 0x77]; else return 0; return 1; case JIS0201_KATA_set: if (c1 > 0x5f) return 0; *out = 0xff40 + c1; return 1; case EXTRA_SYMBOLS_set: if (c1 == 0x75 || (c1 == 0x76 && (c2 - 0x20) <=43)) { *out = extra_kanji[(c1 - 0x75) * 96 + (c2 - 0x20)]; return 1; } /* fall through */ case KANJI_set: /* check extra symbols */ if (c1 >= 0x7a && c1 <= 0x7e) { const struct mchar_entry *entry; c1 -= 0x20; c2 -= 0x20; if (c1 == 0x5c && c2 >= 0x1a && c2 <= 0x1f) entry = &ext_sym_smallk[c2 - 0x1a]; else if (c1 == 0x5c && c2 >= 0x38 && c2 <= 0x55) entry = &ext_sym_music[c2 - 0x38]; else entry = NULL; if (entry) { int i; for (i = 0; i < entry->len; i++) out[i] = entry->to[i]; return i; } *out = extra_symbols[c1 - 0x5a][c2]; if (*out == 0) return 0; return 1; } if (set == EXTRA_SYMBOLS_set) return 0; /* non-JISX0213 modification. (combining chars) */ if (c1 == 0x22 && c2 == 0x7e) { *out = 0x20dd; return 1; } else if (c1 == 0x21 && c2 >= 0x2d && c2 <= 0x32) { *out = nonspacing_symbol[c2 - 0x2d]; return 1; } /* fall through */ case JISX0213_1_set: case JISX0213_2_set: len = 1; ch = jisx0213_to_ucs4(c1 | (set == JISX0213_2_set ? 0x0200 : 0x0100), c2); if (ch == 0) return 0; if (ch < 0x80) { len = 2; out[0] = __jisx0213_to_ucs_combining[ch - 1][0]; out[1] = __jisx0213_to_ucs_combining[ch - 1][1]; } else *out = ch; return len; case MOSAIC_A_set: case MOSAIC_B_set: case MOSAIC_C_set: case MOSAIC_D_set: case DRCS0_set: case MACRO_set: *out = __UNKNOWN_10646_CHAR; return 1; default: break; } return 0; } #define BODY \ { \ uint32_t ch = *inptr; \ \ if (ch == 0) \ { \ st.mode = NORMAL; \ ++ inptr; \ continue; \ } \ if (__glibc_unlikely (st.mode == CTRL_SEQ)) \ { \ if (st.skip) \ { \ --st.skip; \ if (st.skip == 0) \ st.mode = NORMAL; \ if (ch < 0x40 || ch > 0x7f) \ STANDARD_FROM_LOOP_ERR_HANDLER (1); \ } \ else if (st.prev == MACRO_CTRL) \ { \ if (ch == MACRO_CTRL) \ st.skip = 1; \ else if (ch == LF || ch == CR) { \ st = def_state_from; \ put32(outptr, ch); \ outptr += 4; \ } \ } \ else if (st.prev == CSI && (ch == 0x5b || ch == 0x5c || ch == 0x6f)) \ st.mode = NORMAL; \ else if (st.prev == TIME || st.prev == CSI) \ { \ if (ch == 0x20 || (st.prev == TIME && ch == 0x28)) \ st.skip = 1; \ else if (!((st.prev == TIME && ch == 0x29) \ || ch == 0x3b || (ch >= 0x30 && ch <= 0x39))) \ { \ st.mode = NORMAL; \ STANDARD_FROM_LOOP_ERR_HANDLER (1); \ } \ } \ else if (st.prev == COL || st.prev == CDC) \ { \ if (ch == 0x20) \ st.skip = 1; \ else \ { \ st.mode = NORMAL; \ if (ch < 0x40 || ch > 0x7f) \ STANDARD_FROM_LOOP_ERR_HANDLER (1); \ } \ } \ ++ inptr; \ continue; \ } \ \ if (__glibc_unlikely (ch == LF)) \ { \ st = def_state_from; \ put32 (outptr, ch); \ outptr += 4; \ ++ inptr; \ continue; \ } \ \ if (__glibc_unlikely (st.mode == ESCAPE)) \ { \ if (ch == LS2 || ch == LS3) \ { \ st.mode = NORMAL; \ st.gl = (ch == LS2) ? 2 : 3; \ st.ss = 0; \ } \ else if (ch == LS1R || ch == LS2R || ch == LS3R) \ { \ st.mode = NORMAL; \ st.gr = (ch == LS1R) ? 1 : (ch == LS2R) ? 2 : 3; \ st.ss = 0; \ } \ else if (ch == 0x24) \ st.mode = DESIGNATE_MB; \ else if (ch >= 0x28 && ch <= 0x2b) \ { \ st.mode = G_SEL_1B; \ st.gidx = ch - 0x28; \ } \ else \ { \ st.mode = NORMAL; \ STANDARD_FROM_LOOP_ERR_HANDLER (1); \ } \ ++ inptr; \ continue; \ } \ \ if (__glibc_unlikely (st.mode == DESIGNATE_MB)) \ { \ if (ch == KANJI_set || ch == JISX0213_1_set || ch == JISX0213_2_set \ || ch == EXTRA_SYMBOLS_set) \ { \ st.mode = NORMAL; \ st.g[0] = ch; \ } \ else if (ch >= 0x28 && ch <= 0x2b) \ { \ st.mode = G_SEL_MB; \ st.gidx = ch - 0x28; \ } \ else \ { \ st.mode = NORMAL; \ STANDARD_FROM_LOOP_ERR_HANDLER (1); \ } \ ++ inptr; \ continue; \ } \ \ if (__glibc_unlikely (st.mode == G_SEL_1B)) \ { \ if (ch == ASCII_set || ch == ASCII_x_set || ch == JIS0201_KATA_set \ || (ch >= 0x30 && ch <= 0x38)) \ { \ st.g[st.gidx] = ch; \ st.mode = NORMAL; \ } \ else if (ch == 0x20) \ st.mode = DRCS_SEL_1B; \ else \ { \ st.mode = NORMAL; \ STANDARD_FROM_LOOP_ERR_HANDLER (1); \ } \ ++ inptr; \ continue; \ } \ \ if (__glibc_unlikely (st.mode == G_SEL_MB)) \ { \ if (ch == KANJI_set || ch == JISX0213_1_set || ch == JISX0213_2_set \ || ch == EXTRA_SYMBOLS_set) \ { \ st.g[st.gidx] = ch; \ st.mode = NORMAL; \ } \ else if (ch == 0x20) \ st.mode = DRCS_SEL_MB; \ else \ { \ st.mode = NORMAL; \ STANDARD_FROM_LOOP_ERR_HANDLER (1); \ } \ ++ inptr; \ continue; \ } \ \ if (__glibc_unlikely (st.mode == DRCS_SEL_1B)) \ { \ st.mode = NORMAL; \ if (ch == 0x70 || (ch >= 0x41 && ch <= 0x4f)) \ st.g[st.gidx] = ch | 0x80; \ else \ STANDARD_FROM_LOOP_ERR_HANDLER (1); \ ++ inptr; \ continue; \ } \ \ if (__glibc_unlikely (st.mode == DRCS_SEL_MB)) \ { \ st.mode = NORMAL; \ if (ch == 0x40) \ st.g[st.gidx] = ch | 0x80; \ else \ STANDARD_FROM_LOOP_ERR_HANDLER (1); \ ++ inptr; \ continue; \ } \ \ if (st.mode == MB_2ND) \ { \ int gidx; \ int i, len; \ uint32_t out[MAX_NEEDED_OUTPUT]; \ \ gidx = (st.ss) ? st.ss : (ch & 0x80) ? st.gr : st.gl; \ st.mode = NORMAL; \ st.ss = 0; \ if (__glibc_unlikely (!(ch & 0x60))) /* C0/C1 */ \ STANDARD_FROM_LOOP_ERR_HANDLER (1); \ if (__glibc_unlikely (st.ss > 0 && (ch & 0x80))) \ STANDARD_FROM_LOOP_ERR_HANDLER (1); \ if (__glibc_unlikely ((st.prev & 0x80) != (ch & 0x80))) \ STANDARD_FROM_LOOP_ERR_HANDLER (1); \ len = b24_char_conv(st.g[gidx], (st.prev & 0x7f), (ch & 0x7f), out); \ if (len == 0) \ STANDARD_FROM_LOOP_ERR_HANDLER (1); \ if (outptr + 4 * len > outend) \ { \ result = __GCONV_FULL_OUTPUT; \ break; \ } \ for (i = 0; i < len; i++) \ { \ if (irreversible \ && __builtin_expect (out[i] == __UNKNOWN_10646_CHAR, 0)) \ ++ *irreversible; \ put32 (outptr, out[i]); \ outptr += 4; \ } \ ++ inptr; \ continue; \ } \ \ if (st.mode == NORMAL) \ { \ int gidx, set; \ \ if (__glibc_unlikely (!(ch & 0x60))) /* C0/C1 */ \ { \ if (ch == ESC) \ st.mode = ESCAPE; \ else if (ch == SS2) \ st.ss = 2; \ else if (ch == SS3) \ st.ss = 3; \ else if (ch == LS0) \ { \ st.ss = 0; \ st.gl = 0; \ } \ else if (ch == LS1) \ { \ st.ss = 0; \ st.gl = 1; \ } \ else if (ch == BEL || ch == BS || ch == CR) \ { \ st.ss = 0; \ put32 (outptr, ch); \ outptr += 4; \ } \ else if (ch == 0x09 || ch == 0x0b || ch == 0x0c || ch == 0x18 \ || ch == 0x1e || ch == 0x1f || (ch >= 0x80 && ch <= 0x8a)\ || ch == 0x99 || ch == 0x9a) \ { \ /* do nothing. just skip */ \ } \ else if (ch == 0x16 || ch == 0x8b || ch == 0x91 || ch == 0x93 \ || ch == 0x94 || ch == 0x97 || ch == 0x98) \ { \ st.mode = CTRL_SEQ; \ st.skip = 1; \ } \ else if (ch == 0x1c) \ { \ st.mode = CTRL_SEQ; \ st.skip = 2; \ } \ else if (ch == COL || ch == CDC || ch == MACRO_CTRL \ || ch == CSI ||ch == TIME) \ { \ st.mode = CTRL_SEQ; \ st.skip = 0; \ st.prev = ch; \ } \ else \ STANDARD_FROM_LOOP_ERR_HANDLER (1); \ \ ++ inptr; \ continue; \ } \ \ if (__glibc_unlikely ((ch & 0x7f) == 0x20 || ch == 0x7f)) \ { \ st.ss = 0; \ put32 (outptr, ch); \ outptr += 4; \ ++ inptr; \ continue; \ } \ if (__glibc_unlikely (ch == 0xff)) \ { \ st.ss = 0; \ put32 (outptr, __UNKNOWN_10646_CHAR); \ if (irreversible) \ ++ *irreversible; \ outptr += 4; \ ++ inptr; \ continue; \ } \ \ if (__glibc_unlikely (st.ss > 0 && (ch & 0x80))) \ STANDARD_FROM_LOOP_ERR_HANDLER (1); \ \ gidx = (st.ss) ? st.ss : (ch & 0x80) ? st.gr : st.gl; \ set = st.g[gidx]; \ if (set == DRCS0_set || set == KANJI_set || set == JISX0213_1_set \ || set == JISX0213_2_set || set == EXTRA_SYMBOLS_set) \ { \ st.mode = MB_2ND; \ st.prev = ch; \ } \ else \ { \ uint32_t out; \ \ st.ss = 0; \ if (b24_char_conv(set, (ch & 0x7f), 0, &out) == 0) \ STANDARD_FROM_LOOP_ERR_HANDLER (1); \ if (out == __UNKNOWN_10646_CHAR && irreversible) \ ++ *irreversible; \ put32 (outptr, out); \ outptr += 4; \ } \ ++ inptr; \ continue; \ } \ } #include /* Next, define the other direction, from UCS-4 to ARIB-STD-B24. */ /* As MIN_INPUT is 4 (> 1), .cnt & .value must be put aside for skeleton.c. * To reduce the size of the state and fit into mbstate_t, * put constraints on G-set that can be locking-shift'ed to GL/GR. * GL is limited to invoke G0/G1, GR to G2/G3. i.e. LS2,LS3, LS1R are not used. * G0 is fixed to KANJI, G1 to ASCII. * G2 can be either HIRAGANA/JISX0213_{1,2}, * G3 can be either KATAKANA/JISX0201_KATA/EXTRA_SYMBOLS. * JISX0213_{1,2},EXTRA_SYMBOLS are invoked into GR by SS2/SS3 * if it is not already invoked to GR. * plus, charset is referenced by an index instead of its designation char. */ enum gset_idx { KANJI_idx, ASCII_idx, HIRAGANA_idx, KATAKANA_idx, JIS0201_KATA_idx, JISX0213_1_idx, JISX0213_2_idx, EXTRA_SYMBOLS_idx, }; struct state_to { /* __count */ uint32_t cnt:3; /* for use in skelton.c.*/ uint32_t gl:1; /* 0: GL<-G0, 1: GL<-G1 */ uint32_t gr:1; /* 0: GR<-G2, 1: GR<-G3 */ uint32_t g2:3; /* Gset idx which is designated to G0 */ uint32_t g3:3; /* same to G1 */ uint32_t prev:21; /* previously input, combining char (for JISX0213) */ /* __value */ uint32_t __value; /* used in skeleton.c */ } __attribute__((packed)); static const struct state_to def_state_to = { .cnt = 0, .gl = 0, .gr = 0, .g2 = HIRAGANA_idx, .g3 = KATAKANA_idx, .prev = 0, .__value = 0 }; #define EXTRA_LOOP_DECLS , __mbstate_t *statep #define EXTRA_LOOP_ARGS , statep #define INIT_PARAMS \ struct state_to st = *((struct state_to *) statep); \ if (st.g2 == 0) \ st = def_state_to; \ #define UPDATE_PARAMS *statep = *((__mbstate_t * )&st) #define REINIT_PARAMS \ do \ { \ st = *((struct state_to *) statep); \ if (st.g2 == 0) \ st = def_state_to; \ } \ while (0) #define LOOP_NEED_FLAGS #define MIN_NEEDED_INPUT TO_LOOP_MIN_NEEDED_FROM #define MAX_NEEDED_INPUT TO_LOOP_MAX_NEEDED_FROM #define MIN_NEEDED_OUTPUT TO_LOOP_MIN_NEEDED_TO #define MAX_NEEDED_OUTPUT TO_LOOP_MAX_NEEDED_TO #define LOOPFCT TO_LOOP /* tables and functions used in BODY */ /* Composition tables for each of the relevant combining characters. */ static const struct { uint16_t base; uint16_t composed; } comp_table_data[] = { #define COMP_TABLE_IDX_02E5 0 #define COMP_TABLE_LEN_02E5 1 { 0x2b64, 0x2b65 }, /* 0x12B65 = 0x12B64 U+02E5 */ #define COMP_TABLE_IDX_02E9 (COMP_TABLE_IDX_02E5 + COMP_TABLE_LEN_02E5) #define COMP_TABLE_LEN_02E9 1 { 0x2b60, 0x2b66 }, /* 0x12B66 = 0x12B60 U+02E9 */ #define COMP_TABLE_IDX_0300 (COMP_TABLE_IDX_02E9 + COMP_TABLE_LEN_02E9) #define COMP_TABLE_LEN_0300 5 { 0x295c, 0x2b44 }, /* 0x12B44 = 0x1295C U+0300 */ { 0x2b38, 0x2b48 }, /* 0x12B48 = 0x12B38 U+0300 */ { 0x2b37, 0x2b4a }, /* 0x12B4A = 0x12B37 U+0300 */ { 0x2b30, 0x2b4c }, /* 0x12B4C = 0x12B30 U+0300 */ { 0x2b43, 0x2b4e }, /* 0x12B4E = 0x12B43 U+0300 */ #define COMP_TABLE_IDX_0301 (COMP_TABLE_IDX_0300 + COMP_TABLE_LEN_0300) #define COMP_TABLE_LEN_0301 4 { 0x2b38, 0x2b49 }, /* 0x12B49 = 0x12B38 U+0301 */ { 0x2b37, 0x2b4b }, /* 0x12B4B = 0x12B37 U+0301 */ { 0x2b30, 0x2b4d }, /* 0x12B4D = 0x12B30 U+0301 */ { 0x2b43, 0x2b4f }, /* 0x12B4F = 0x12B43 U+0301 */ #define COMP_TABLE_IDX_309A (COMP_TABLE_IDX_0301 + COMP_TABLE_LEN_0301) #define COMP_TABLE_LEN_309A 14 { 0x242b, 0x2477 }, /* 0x12477 = 0x1242B U+309A */ { 0x242d, 0x2478 }, /* 0x12478 = 0x1242D U+309A */ { 0x242f, 0x2479 }, /* 0x12479 = 0x1242F U+309A */ { 0x2431, 0x247a }, /* 0x1247A = 0x12431 U+309A */ { 0x2433, 0x247b }, /* 0x1247B = 0x12433 U+309A */ { 0x252b, 0x2577 }, /* 0x12577 = 0x1252B U+309A */ { 0x252d, 0x2578 }, /* 0x12578 = 0x1252D U+309A */ { 0x252f, 0x2579 }, /* 0x12579 = 0x1252F U+309A */ { 0x2531, 0x257a }, /* 0x1257A = 0x12531 U+309A */ { 0x2533, 0x257b }, /* 0x1257B = 0x12533 U+309A */ { 0x253b, 0x257c }, /* 0x1257C = 0x1253B U+309A */ { 0x2544, 0x257d }, /* 0x1257D = 0x12544 U+309A */ { 0x2548, 0x257e }, /* 0x1257E = 0x12548 U+309A */ { 0x2675, 0x2678 }, /* 0x12678 = 0x12675 U+309A */ }; static const uint32_t ucs4_to_nonsp_kanji[][2] = { {0x20dd, 0x227e}, {0x0300, 0x212e}, {0x0301, 0x212d}, {0x0302, 0x2130}, {0x0304, 0x2131}, {0x0308, 0x212f}, {0x0332, 0x2132} }; static const uint32_t ucs4_to_extsym[][2] = { {0x00b2, 0x7c55}, {0x00b3, 0x7c56}, {0x00bc, 0x7d54}, {0x00bd, 0x7d50}, {0x00be, 0x7d55}, {0x0fd6, 0x7b2d}, {0x203c, 0x7d6e}, {0x2049, 0x7d6f}, {0x2113, 0x7d47}, {0x2116, 0x7d2d}, {0x2121, 0x7d2e}, {0x213b, 0x7c7b}, {0x2150, 0x7d5c}, {0x2151, 0x7d5e}, {0x2152, 0x7d5f}, {0x2153, 0x7d52}, {0x2154, 0x7d53}, {0x2155, 0x7d56}, {0x2156, 0x7d57}, {0x2157, 0x7d58}, {0x2158, 0x7d59}, {0x2159, 0x7d5a}, {0x215a, 0x7d5b}, {0x215b, 0x7d5d}, {0x2160, 0x7e21}, {0x2161, 0x7e22}, {0x2162, 0x7e23}, {0x2163, 0x7e24}, {0x2164, 0x7e25}, {0x2165, 0x7e26}, {0x2166, 0x7e27}, {0x2167, 0x7e28}, {0x2168, 0x7e29}, {0x2169, 0x7e2a}, {0x216a, 0x7e2b}, {0x216b, 0x7e2c}, {0x2189, 0x7d51}, {0x2460, 0x7e61}, {0x2461, 0x7e62}, {0x2462, 0x7e63}, {0x2463, 0x7e64}, {0x2464, 0x7e65}, {0x2465, 0x7e66}, {0x2466, 0x7e67}, {0x2467, 0x7e68}, {0x2468, 0x7e69}, {0x2469, 0x7e6a}, {0x246a, 0x7e6b}, {0x246b, 0x7e6c}, {0x246c, 0x7e6d}, {0x246d, 0x7e6e}, {0x246e, 0x7e6f}, {0x246f, 0x7e70}, {0x2470, 0x7e2d}, {0x2471, 0x7e2e}, {0x2472, 0x7e2f}, {0x2473, 0x7e30}, {0x2474, 0x7e31}, {0x2475, 0x7e32}, {0x2476, 0x7e33}, {0x2477, 0x7e34}, {0x2478, 0x7e35}, {0x2479, 0x7e36}, {0x247a, 0x7e37}, {0x247b, 0x7e38}, {0x247c, 0x7e39}, {0x247d, 0x7e3a}, {0x247e, 0x7e3b}, {0x247f, 0x7e3c}, {0x2488, 0x7c31}, {0x2489, 0x7c32}, {0x248a, 0x7c33}, {0x248b, 0x7c34}, {0x248c, 0x7c35}, {0x248d, 0x7c36}, {0x248e, 0x7c37}, {0x248f, 0x7c38}, {0x2490, 0x7c39}, {0x2491, 0x7a4d}, {0x2492, 0x7a4e}, {0x2493, 0x7a4f}, {0x24b9, 0x7b3e}, {0x24c8, 0x7b3f}, {0x24eb, 0x7e7b}, {0x24ec, 0x7e7c}, {0x25b6, 0x7c50}, {0x25c0, 0x7c51}, {0x2600, 0x7d60}, {0x2601, 0x7d61}, {0x2602, 0x7d62}, {0x2603, 0x7d73}, {0x260e, 0x7b4b}, {0x260e, 0x7d7b}, {0x2613, 0x7b26}, {0x2614, 0x7d71}, {0x2616, 0x7d64}, {0x2617, 0x7d65}, {0x2660, 0x7d6b}, {0x2663, 0x7d6a}, {0x2665, 0x7d69}, {0x2666, 0x7d68}, {0x2668, 0x7b31}, {0x266c, 0x7d7a}, {0x2693, 0x7b35}, {0x269e, 0x7d78}, {0x269f, 0x7d79}, {0x26a1, 0x7d75}, {0x26be, 0x7d30}, {0x26bf, 0x7a67}, {0x26c4, 0x7d63}, {0x26c5, 0x7d70}, {0x26c6, 0x7d72}, {0x26c7, 0x7d74}, {0x26c8, 0x7d76}, {0x26c9, 0x7d66}, {0x26ca, 0x7d67}, {0x26cb, 0x7d6c}, {0x26cc, 0x7a21}, {0x26cd, 0x7a22}, {0x26cf, 0x7a24}, {0x26d0, 0x7a25}, {0x26d1, 0x7a26}, {0x26d2, 0x7a28}, {0x26d3, 0x7a2a}, {0x26d4, 0x7a2b}, {0x26d5, 0x7a29}, {0x26d6, 0x7a34}, {0x26d7, 0x7a35}, {0x26d8, 0x7a36}, {0x26d9, 0x7a37}, {0x26da, 0x7a38}, {0x26db, 0x7a39}, {0x26dc, 0x7a3a}, {0x26dd, 0x7a3b}, {0x26de, 0x7a3c}, {0x26df, 0x7a3d}, {0x26e0, 0x7a3e}, {0x26e1, 0x7a3f}, {0x26e3, 0x7b21}, {0x26e8, 0x7b29}, {0x26e9, 0x7b2c}, {0x26ea, 0x7b2e}, {0x26eb, 0x7b2f}, {0x26ec, 0x7b30}, {0x26ed, 0x7b32}, {0x26ee, 0x7b33}, {0x26ef, 0x7b34}, {0x26f0, 0x7b37}, {0x26f1, 0x7b38}, {0x26f2, 0x7b39}, {0x26f3, 0x7b3a}, {0x26f4, 0x7b3b}, {0x26f5, 0x7b3c}, {0x26f6, 0x7b40}, {0x26f7, 0x7b46}, {0x26f8, 0x7b47}, {0x26f9, 0x7b48}, {0x26fa, 0x7b49}, {0x26fb, 0x7b4c}, {0x26fc, 0x7b4d}, {0x26fd, 0x7b4e}, {0x26fe, 0x7b4f}, {0x26ff, 0x7b51}, {0x2762, 0x7a23}, {0x2776, 0x7e71}, {0x2777, 0x7e72}, {0x2778, 0x7e73}, {0x2779, 0x7e74}, {0x277a, 0x7e75}, {0x277b, 0x7e76}, {0x277c, 0x7e77}, {0x277d, 0x7e78}, {0x277e, 0x7e79}, {0x277f, 0x7e7a}, {0x27a1, 0x7c21}, {0x27d0, 0x7c54}, {0x2a00, 0x7d6d}, {0x2b05, 0x7c22}, {0x2b06, 0x7c23}, {0x2b07, 0x7c24}, {0x2b1b, 0x7a60}, {0x2b24, 0x7a61}, {0x2b2e, 0x7c26}, {0x2b2f, 0x7c25}, {0x2b55, 0x7a40}, {0x2b56, 0x7b22}, {0x2b57, 0x7b23}, {0x2b58, 0x7b24}, {0x2b59, 0x7b25}, {0x3012, 0x7b28}, {0x3016, 0x7c52}, {0x3017, 0x7c53}, {0x3036, 0x7d2f}, {0x322a, 0x7d21}, {0x322b, 0x7d22}, {0x322c, 0x7d23}, {0x322d, 0x7d24}, {0x322e, 0x7d25}, {0x322f, 0x7d26}, {0x3230, 0x7d27}, {0x3231, 0x7c4d}, {0x3232, 0x7c4c}, {0x3233, 0x7c4a}, {0x3236, 0x7c4b}, {0x3237, 0x7d28}, {0x3239, 0x7c4e}, {0x3244, 0x7c4f}, {0x3245, 0x7b2b}, {0x3246, 0x7b2a}, {0x3247, 0x7c78}, {0x3248, 0x7a41}, {0x3249, 0x7a42}, {0x324a, 0x7a43}, {0x324b, 0x7a44}, {0x324c, 0x7a45}, {0x324d, 0x7a46}, {0x324e, 0x7a47}, {0x324f, 0x7a48}, {0x3251, 0x7e3d}, {0x3252, 0x7e3e}, {0x3253, 0x7e3f}, {0x3254, 0x7e40}, {0x3255, 0x7e5b}, {0x3256, 0x7e5c}, {0x3257, 0x7e5d}, {0x3258, 0x7e5e}, {0x3259, 0x7e5f}, {0x325a, 0x7e60}, {0x325b, 0x7e7d}, {0x328b, 0x7b27}, {0x3299, 0x7a73}, {0x3371, 0x7d4d}, {0x337b, 0x7d2c}, {0x337c, 0x7d2b}, {0x337d, 0x7d2a}, {0x337e, 0x7d29}, {0x338f, 0x7d48}, {0x3390, 0x7d49}, {0x339d, 0x7c2d}, {0x339e, 0x7d4b}, {0x33a0, 0x7c2e}, {0x33a1, 0x7c2b}, {0x33a2, 0x7d4c}, {0x33a4, 0x7c2f}, {0x33a5, 0x7c2c}, {0x33ca, 0x7d4a}, {0x3402, 0x7521}, {0x351f, 0x752a}, {0x37e2, 0x7541}, {0x3eda, 0x7574}, {0x4093, 0x7578}, {0x4103, 0x757e}, {0x4264, 0x7626}, {0x4efd, 0x7523}, {0x4eff, 0x7524}, {0x4f9a, 0x7525}, {0x4fc9, 0x7526}, {0x509c, 0x7527}, {0x511e, 0x7528}, {0x5186, 0x7c2a}, {0x51bc, 0x7529}, {0x5307, 0x752b}, {0x5361, 0x752c}, {0x536c, 0x752d}, {0x544d, 0x7530}, {0x5496, 0x7531}, {0x549c, 0x7532}, {0x54a9, 0x7533}, {0x550e, 0x7534}, {0x554a, 0x7535}, {0x5672, 0x7536}, {0x56e4, 0x7537}, {0x5733, 0x7538}, {0x5734, 0x7539}, {0x5880, 0x753b}, {0x59e4, 0x753c}, {0x5a23, 0x753d}, {0x5a55, 0x753e}, {0x5bec, 0x753f}, {0x5e74, 0x7c27}, {0x5eac, 0x7542}, {0x5f34, 0x7543}, {0x5f45, 0x7544}, {0x5fb7, 0x7545}, {0x6017, 0x7546}, {0x6130, 0x7548}, {0x65e5, 0x7c29}, {0x6624, 0x7549}, {0x66c8, 0x754a}, {0x66d9, 0x754b}, {0x66fa, 0x754c}, {0x66fb, 0x754d}, {0x6708, 0x7c28}, {0x6852, 0x754e}, {0x6911, 0x7550}, {0x693b, 0x7551}, {0x6a45, 0x7552}, {0x6a91, 0x7553}, {0x6adb, 0x7554}, {0x6bf1, 0x7558}, {0x6ce0, 0x7559}, {0x6d2e, 0x755a}, {0x6dbf, 0x755c}, {0x6dca, 0x755d}, {0x6df8, 0x755e}, {0x6f5e, 0x7560}, {0x6ff9, 0x7561}, {0x7064, 0x7562}, {0x7147, 0x7565}, {0x71c1, 0x7566}, {0x7200, 0x7567}, {0x739f, 0x7568}, {0x73a8, 0x7569}, {0x73c9, 0x756a}, {0x73d6, 0x756b}, {0x741b, 0x756c}, {0x7421, 0x756d}, {0x7426, 0x756f}, {0x742a, 0x7570}, {0x742c, 0x7571}, {0x7439, 0x7572}, {0x744b, 0x7573}, {0x7575, 0x7575}, {0x7581, 0x7576}, {0x7772, 0x7577}, {0x78c8, 0x7579}, {0x78e0, 0x757a}, {0x7947, 0x757b}, {0x79ae, 0x757c}, {0x79da, 0x7622}, {0x7a1e, 0x7623}, {0x7b7f, 0x7624}, {0x7c31, 0x7625}, {0x7d8b, 0x7627}, {0x7fa1, 0x7628}, {0x8118, 0x7629}, {0x813a, 0x762a}, {0x82ae, 0x762c}, {0x845b, 0x762d}, {0x84dc, 0x762e}, {0x84ec, 0x762f}, {0x8559, 0x7630}, {0x85ce, 0x7631}, {0x8755, 0x7632}, {0x87ec, 0x7633}, {0x880b, 0x7634}, {0x88f5, 0x7635}, {0x89d2, 0x7636}, {0x8a79, 0x752e}, {0x8af6, 0x7637}, {0x8dce, 0x7638}, {0x8fbb, 0x7639}, {0x8ff6, 0x763a}, {0x90dd, 0x763b}, {0x9127, 0x763c}, {0x912d, 0x763d}, {0x91b2, 0x763e}, {0x9233, 0x763f}, {0x9288, 0x7640}, {0x9321, 0x7641}, {0x9348, 0x7642}, {0x9592, 0x7643}, {0x96de, 0x7644}, {0x9903, 0x7645}, {0x9940, 0x7646}, {0x9ad9, 0x7647}, {0x9bd6, 0x7648}, {0x9dd7, 0x7649}, {0x9eb4, 0x764a}, {0x9eb5, 0x764b}, {0x9fc4, 0x754f}, {0x9fc5, 0x7621}, {0x9fc6, 0x757d}, {0xfa10, 0x753a}, {0xfa11, 0x7540}, {0xfa45, 0x755b}, {0xfa46, 0x755f}, {0xfa4a, 0x756e}, {0xfa6b, 0x7547}, {0xfa6c, 0x7563}, {0xfa6d, 0x762b}, {0x1f100, 0x7c30}, {0x1f101, 0x7c40}, {0x1f102, 0x7c41}, {0x1f103, 0x7c42}, {0x1f104, 0x7c43}, {0x1f105, 0x7c44}, {0x1f106, 0x7c45}, {0x1f107, 0x7c46}, {0x1f108, 0x7c47}, {0x1f109, 0x7c48}, {0x1f10a, 0x7c49}, {0x1f110, 0x7e41}, {0x1f111, 0x7e42}, {0x1f112, 0x7e43}, {0x1f113, 0x7e44}, {0x1f114, 0x7e45}, {0x1f115, 0x7e46}, {0x1f116, 0x7e47}, {0x1f117, 0x7e48}, {0x1f118, 0x7e49}, {0x1f119, 0x7e4a}, {0x1f11a, 0x7e4b}, {0x1f11b, 0x7e4c}, {0x1f11c, 0x7e4d}, {0x1f11d, 0x7e4e}, {0x1f11e, 0x7e4f}, {0x1f11f, 0x7e50}, {0x1f120, 0x7e51}, {0x1f121, 0x7e52}, {0x1f122, 0x7e53}, {0x1f123, 0x7e54}, {0x1f124, 0x7e55}, {0x1f125, 0x7e56}, {0x1f126, 0x7e57}, {0x1f127, 0x7e58}, {0x1f128, 0x7e59}, {0x1f129, 0x7e5a}, {0x1f12a, 0x7d3a}, {0x1f12b, 0x7c77}, {0x1f12c, 0x7c76}, {0x1f12d, 0x7c57}, {0x1f131, 0x7a5e}, {0x1f13d, 0x7a5f}, {0x1f13f, 0x7a52}, {0x1f142, 0x7a59}, {0x1f146, 0x7a53}, {0x1f14a, 0x7a50}, {0x1f14b, 0x7a54}, {0x1f14c, 0x7a51}, {0x1f14d, 0x7a5d}, {0x1f14e, 0x7a72}, {0x1f157, 0x7b3d}, {0x1f15f, 0x7b41}, {0x1f179, 0x7b45}, {0x1f17b, 0x7b4a}, {0x1f17c, 0x7b50}, {0x1f17f, 0x7a30}, {0x1f18a, 0x7a31}, {0x1f18b, 0x7b42}, {0x1f18c, 0x7b44}, {0x1f18d, 0x7b43}, {0x1f190, 0x7c79}, {0x1f200, 0x7a74}, {0x1f210, 0x7a55}, {0x1f211, 0x7a56}, {0x1f212, 0x7a57}, {0x1f213, 0x7a58}, {0x1f214, 0x7a5a}, {0x1f214, 0x7d3e}, {0x1f215, 0x7a5b}, {0x1f216, 0x7a5c}, {0x1f217, 0x7a62}, {0x1f218, 0x7a63}, {0x1f219, 0x7a64}, {0x1f21a, 0x7a65}, {0x1f21b, 0x7a66}, {0x1f21c, 0x7a68}, {0x1f21d, 0x7a69}, {0x1f21e, 0x7a6a}, {0x1f21f, 0x7a6b}, {0x1f220, 0x7a6c}, {0x1f221, 0x7a6d}, {0x1f222, 0x7a6e}, {0x1f223, 0x7a6f}, {0x1f224, 0x7a70}, {0x1f225, 0x7a71}, {0x1f226, 0x7c7a}, {0x1f227, 0x7d3b}, {0x1f228, 0x7d3c}, {0x1f229, 0x7d3d}, {0x1f22a, 0x7d3f}, {0x1f22b, 0x7d40}, {0x1f22c, 0x7d41}, {0x1f22d, 0x7d42}, {0x1f22e, 0x7d43}, {0x1f22f, 0x7d44}, {0x1f230, 0x7d45}, {0x1f231, 0x7d46}, {0x1f240, 0x7d31}, {0x1f241, 0x7d32}, {0x1f242, 0x7d33}, {0x1f243, 0x7d34}, {0x1f244, 0x7d35}, {0x1f245, 0x7d36}, {0x1f246, 0x7d37}, {0x1f247, 0x7d38}, {0x1f248, 0x7d39}, {0x1f6e7, 0x7b36}, {0x20158, 0x7522}, {0x20bb7, 0x752f}, {0x233cc, 0x7555}, {0x233fe, 0x7556}, {0x235c4, 0x7557}, {0x242ee, 0x7564} }; static int out_ascii (struct state_to *st, uint32_t ch, unsigned char **outptr, const unsigned char *outend) { size_t esc_seqs; unsigned char *op = *outptr; esc_seqs = 0; if ((ch & 0x60) && st->gl == 0 && ch != 0x20 && ch != 0x7f && ch != 0xa0) ++ esc_seqs; if (__glibc_unlikely (op + esc_seqs + 1 > outend)) return __GCONV_FULL_OUTPUT; if (esc_seqs > 0) { *op++ = LS1; st->gl = 1; } *op++ = ch & 0xff; if (ch == 0 || ch == LF) *st = def_state_to; *outptr = op; return __GCONV_OK; } static int out_jisx0201 (struct state_to *st, uint32_t ch, unsigned char **outptr, const unsigned char *outend) { size_t esc_seqs; unsigned char *op = *outptr; esc_seqs = 0; if (st->g3 != JIS0201_KATA_idx) esc_seqs += 3; if (st->gr == 0) /* need LS3R */ esc_seqs += 2; if (__glibc_unlikely (op + esc_seqs + 1 > outend)) return __GCONV_FULL_OUTPUT; if (esc_seqs >= 3) { /* need charset designation */ *op++ = ESC; *op++ = '\x2b'; /* designate single byte charset to G3 */ *op++ = JIS0201_KATA_set; st->g3 = JIS0201_KATA_idx; } if (esc_seqs == 2 || esc_seqs == 5) { *op++ = ESC; *op++ = LS3R; st->gr = 1; } *op++ = ch & 0xff; *outptr = op; return __GCONV_OK; } static int out_katakana (struct state_to *st, unsigned char ch, unsigned char **outptr, const unsigned char *outend) { size_t esc_seqs; unsigned char *op = *outptr; esc_seqs = 0; if (st->g3 != KATAKANA_idx) esc_seqs += 3; if (st->gr == 0) /* need LS3R */ esc_seqs += 2; if (__glibc_unlikely (op + esc_seqs + 1 > outend)) return __GCONV_FULL_OUTPUT; if (esc_seqs >= 3) { /* need charset designation */ *op++ = ESC; *op++ = '\x2b'; /* designate single byte charset to G3 */ *op++ = KATAKANA_set; st->g3 = KATAKANA_idx; } if (esc_seqs == 2 || esc_seqs == 5) { *op++ = ESC; *op++ = LS3R; st->gr = 1; } *op++ = ch | 0x80; *outptr = op; return __GCONV_OK; } static int out_hiragana (struct state_to *st, unsigned char ch, unsigned char **outptr, const unsigned char *outend) { size_t esc_seqs; unsigned char *op = *outptr; esc_seqs = 0; if (st->g2 != HIRAGANA_idx) esc_seqs += 3; if (st->gr == 1) /* need LS2R */ esc_seqs += 2; if (__glibc_unlikely (op + esc_seqs + 1 > outend)) return __GCONV_FULL_OUTPUT; if (esc_seqs >= 3) { /* need charset designation */ *op++ = ESC; *op++ = '\x2a'; /* designate single byte charset to G2 */ *op++ = HIRAGANA_set; st->g2 = HIRAGANA_idx; } if (esc_seqs == 2 || esc_seqs == 5) { *op++ = ESC; *op++ = LS2R; st->gr = 0; } *op++ = ch | 0x80; *outptr = op; return __GCONV_OK; } static int is_kana_punc (uint32_t ch) { int i; size_t len; len = NELEMS (hira_punc); for (i = 0; i < len; i++) if (ch == hira_punc[i]) return i; len = NELEMS (kata_punc); for (i = 0; i < len; i++) if (ch == kata_punc[i]) return i + NELEMS (hira_punc); return -1; } static int out_kana_punc (struct state_to *st, int idx, unsigned char **outptr, const unsigned char *outend) { size_t len = NELEMS (hira_punc); if (idx < len) return out_hiragana (st, 0x77 + idx, outptr, outend); idx -= len; if (idx >= 2) { /* common punc. symbols shared by katakana/hiragana */ /* guess which is used currently */ if (st->gr == 0 && st->g2 == HIRAGANA_idx) return out_hiragana (st, 0x77 + idx, outptr, outend); else if (st->gr == 1 && st->g3 == KATAKANA_idx) return out_katakana (st, 0x77 + idx, outptr, outend); else if (st->g2 == HIRAGANA_idx && st->g3 != KATAKANA_idx) return out_hiragana (st, 0x77 + idx, outptr, outend); /* fall through */ } return out_katakana (st, 0x77 + idx, outptr, outend); } static int out_kanji (struct state_to *st, uint32_t ch, unsigned char **outptr, const unsigned char *outend) { size_t esc_seqs; unsigned char *op = *outptr; esc_seqs = 0; if (st->gl) ++ esc_seqs; if (__glibc_unlikely (op + esc_seqs + 2 > outend)) return __GCONV_FULL_OUTPUT; if (st->gl) { *op++ = LS0; st->gl = 0; } *op++ = (ch >> 8) & 0x7f; *op++ = ch & 0x7f; *outptr = op; return __GCONV_OK; } /* convert JISX0213_{1,2} to ARIB-STD-B24 */ /* assert(set_idx == JISX0213_1_idx || set_idx == JISX0213_2_idx); */ static int out_jisx0213 (struct state_to *st, uint32_t ch, int set_idx, unsigned char **outptr, const unsigned char *outend) { size_t esc_seqs; unsigned char *op = *outptr; esc_seqs = 0; if (st->g2 != set_idx) esc_seqs += 4; /* designate to G2 */ if (st->gr) /* if GR does not designate G2 */ esc_seqs ++; /* SS3 */ if (__glibc_unlikely (op + esc_seqs + 2 > outend)) return __GCONV_FULL_OUTPUT; if (esc_seqs >= 4) { /* need charset designation */ *op++ = ESC; *op++ = '\x24'; /* designate multibyte charset */ *op++ = '\x2a'; /* to G2 */ *op++ = (set_idx == JISX0213_1_idx) ? JISX0213_1_set : JISX0213_2_set; st->g2 = JISX0213_1_idx; } if (st->gr) *op++ = SS2; /* GR designates G3 now. insert SS2 */ else ch |= 0x8080; /* use GR(G2) */ *op++ = (ch >> 8) & 0xff; *op++ = ch & 0xff; *outptr = op; return __GCONV_OK; } static int out_extsym (struct state_to *st, uint32_t ch, unsigned char **outptr, const unsigned char *outend) { size_t esc_seqs; unsigned char *op = *outptr; esc_seqs = 0; if (st->g3 != EXTRA_SYMBOLS_idx) esc_seqs += 4; if (st->gr == 0) /* if GR designates G2, use SS3 */ ++ esc_seqs; if (__glibc_unlikely (op + esc_seqs + 2 > outend)) return __GCONV_FULL_OUTPUT; if (esc_seqs >= 4) { /* need charset designation */ *op++ = ESC; *op++ = '\x24'; /* designate multibyte charset */ *op++ = '\x2b'; /* to G3 */ *op++ = EXTRA_SYMBOLS_set; st->g3 = EXTRA_SYMBOLS_idx; } if (st->gr == 0) *op++ = SS3; else ch |= 0x8080; *op++ = (ch >> 8) & 0xff; *op++ = ch & 0xff; *outptr = op; return __GCONV_OK; } static int out_buffered (struct state_to *st, unsigned char **outptr, const unsigned char *outend) { int r; if (st->prev == 0) return __GCONV_OK; if (st->prev >> 16) r = out_jisx0213 (st, st->prev & 0x7f7f, JISX0213_1_idx, outptr, outend); else if ((st->prev & 0x7f00) == 0x2400) r = out_hiragana (st, st->prev, outptr, outend); else if ((st->prev & 0x7f00) == 0x2500) r = out_katakana (st, st->prev, outptr, outend); else /* should not be reached */ r = out_kanji (st, st->prev, outptr, outend); st->prev = 0; return r; } static int cmp_u32 (const void *a, const void *b) { return *(const uint32_t *)a - *(const uint32_t *)b; } static int find_extsym_idx (uint32_t ch) { const uint32_t (*p)[2]; p = bsearch (&ch, ucs4_to_extsym, NELEMS (ucs4_to_extsym), sizeof (ucs4_to_extsym[0]), cmp_u32); return p ? (p - ucs4_to_extsym) : -1; } #define BODY \ { \ uint32_t ch, jch; \ unsigned char buf[2]; \ int r; \ \ ch = get32 (inptr); \ if (st.prev != 0) \ { \ /* Attempt to combine the last character with this one. */ \ unsigned int idx; \ unsigned int len; \ \ if (ch == 0x02e5) \ idx = COMP_TABLE_IDX_02E5, len = COMP_TABLE_LEN_02E5; \ else if (ch == 0x02e9) \ idx = COMP_TABLE_IDX_02E9, len = COMP_TABLE_LEN_02E9; \ else if (ch == 0x0300) \ idx = COMP_TABLE_IDX_0300, len = COMP_TABLE_LEN_0300; \ else if (ch == 0x0301) \ idx = COMP_TABLE_IDX_0301, len = COMP_TABLE_LEN_0301; \ else if (ch == 0x309a) \ idx = COMP_TABLE_IDX_309A, len = COMP_TABLE_LEN_309A; \ else \ idx = 0, len = 0; \ \ for (;len > 0; ++idx, --len) \ if (comp_table_data[idx].base == (st.prev & 0x7f7f)) \ break; \ \ if (len > 0) \ { \ /* Output the combined character. */ \ /* We know the combined character is in JISX0213 plane 1 */ \ r = out_jisx0213 (&st, comp_table_data[idx].composed, \ JISX0213_1_idx, &outptr, outend); \ st.prev = 0; \ goto next; \ } \ \ /* not a combining character */ \ /* Output the buffered character. */ \ /* We know it is in JISX0208(HIRA/KATA) or in JISX0213 plane 1. */ \ r = out_buffered (&st, &outptr, outend); \ if (r != __GCONV_OK) \ { \ result = r; \ break; \ } \ /* fall through & output the current character (ch). */ \ } \ \ /* ASCII or C0/C1 or NBSP */ \ if (ch <= 0xa0) \ { \ if ((ch & 0x60) || ch == 0 || ch == LF || ch == CR || ch == BS) \ r = out_ascii (&st, ch, &outptr, outend); \ else \ STANDARD_TO_LOOP_ERR_HANDLER (4); \ goto next; \ } \ \ /* half-width KATAKANA */ \ if (ucs4_to_jisx0201 (ch, buf) != __UNKNOWN_10646_CHAR) \ { \ if (__glibc_unlikely (buf[0] < 0x80)) /* yen sign or overline */ \ r = out_ascii (&st, buf[0], &outptr, outend); \ else \ r = out_jisx0201 (&st, buf[0], &outptr, outend); \ goto next; \ } \ \ /* check kana punct. symbols (prefer 1-Byte charset over KANJI_set) */ \ r = is_kana_punc (ch); \ if (r >= 0) \ { \ r = out_kana_punc (&st, r, &outptr, outend); \ goto next; \ } \ \ if (ch >= ucs4_to_nonsp_kanji[0][0] && \ ch <= ucs4_to_nonsp_kanji[NELEMS (ucs4_to_nonsp_kanji) - 1][0]) \ { \ int i; \ \ for (i = 0; i < NELEMS (ucs4_to_nonsp_kanji); i++) \ { \ if (ch < ucs4_to_nonsp_kanji[i][0]) \ break; \ else if (ch == ucs4_to_nonsp_kanji[i][0]) \ { \ r = out_kanji (&st, ucs4_to_nonsp_kanji[i][1], \ &outptr, outend); \ goto next; \ } \ } \ } \ \ jch = ucs4_to_jisx0213 (ch); \ \ if (ucs4_to_jisx0208 (ch, buf, 2) != __UNKNOWN_10646_CHAR) \ { \ if (jch & 0x0080) \ { \ /* A possible match in comp_table_data. Buffer it. */ \ \ /* We know it's a JISX 0213 plane 1 character. */ \ assert ((jch & 0x8000) == 0); \ \ st.prev = jch & 0x7f7f; \ r = __GCONV_OK; \ goto next; \ } \ /* check HIRAGANA/KATAKANA (prefer 1-Byte charset over KANJI_set) */ \ if (buf[0] == 0x24) \ r = out_hiragana (&st, buf[1], &outptr, outend); \ else if (buf[0] == 0x25) \ r = out_katakana (&st, buf[1], &outptr, outend); \ else if (jch == 0x227e || (jch >= 0x212d && jch <= 0x2132)) \ r = out_jisx0213 (&st, jch, JISX0213_1_idx, &outptr, outend); \ else \ r = out_kanji (&st, jch, &outptr, outend); \ goto next; \ } \ \ if (jch & 0x0080) \ { \ st.prev = (jch & 0x7f7f) | 0x10000; \ r = __GCONV_OK; \ goto next; \ } \ \ /* KANJI shares some chars with EXTRA_SYMBOLS, but prefer extra symbols*/ \ r = find_extsym_idx (ch); \ if (r >= 0) \ { \ ch = ucs4_to_extsym[r][1]; \ r = out_extsym (&st, ch, &outptr, outend); \ goto next; \ } \ \ if (jch != 0) \ { \ r = out_jisx0213 (&st, jch & 0x7f7f, \ (jch & 0x8000) ? JISX0213_2_idx : JISX0213_1_idx, \ &outptr, outend); \ goto next; \ } \ \ UNICODE_TAG_HANDLER (ch, 4); \ STANDARD_TO_LOOP_ERR_HANDLER (4); \ \ next: \ if (r != __GCONV_OK) \ { \ result = r; \ break; \ } \ inptr += 4; \ } #include /* Now define the toplevel functions. */ #include