1 /* 2 * This is an implementation of wcwidth() and wcswidth() (defined in 3 * IEEE Std 1002.1-2001) for Unicode. 4 * 5 * http://www.opengroup.org/onlinepubs/007904975/functions/wcwidth.html 6 * http://www.opengroup.org/onlinepubs/007904975/functions/wcswidth.html 7 * 8 * Markus Kuhn -- 2007-05-26 (Unicode 5.0) 9 * 10 * Permission to use, copy, modify, and distribute this software 11 * for any purpose and without fee is hereby granted. The author 12 * disclaims all warranties with regard to this software. 13 * 14 * Latest version: http://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c 15 */ 16 17 /* 18 * Changes made for mutt: 19 * - Adapted for Mutt by Edmund Grimley Evans. 20 * - Changed 'first'/'last' members of combined[] to wchar_t from unsigned short to fix compiler warnings, 2007-11-13, Rocco Rutte 21 */ 22 module wcwidth_cjk_compat.wcwidth; 23 24 25 extern (C) 26 public struct wcwidth_cjk_option 27 { 28 ubyte is_apple = 0; 29 ubyte is_legacy_ja = 0; 30 ubyte is_emoji = 1; 31 } 32 33 extern (C) 34 public static immutable wcwidth_cjk_option default_wcwidth_cjk_option = 35 { 36 is_apple: 0, 37 is_legacy_ja: 0, 38 is_emoji: 1, 39 }; 40 41 extern (C) 42 public static immutable wcwidth_cjk_option* default_wcwidth_cjk_option_ptr = &.default_wcwidth_cjk_option; 43 44 private struct interval 45 { 46 uint first; 47 uint last; 48 } 49 50 /* auxiliary function for binary search in interval table */ 51 pure nothrow @safe @nogc @live 52 private bool bisearch(uint ucs, immutable ref .interval[] table, size_t max) 53 54 in 55 { 56 //assert(table != null); 57 } 58 59 do 60 { 61 if ((ucs < table[0].first) || (ucs > table[max].last)) { 62 return false; 63 } 64 65 size_t min = 0; 66 67 while (max >= min) { 68 size_t temp = min + max; 69 size_t mid = temp / 2; 70 71 if (ucs > table[mid].last) { 72 min = mid + 1; 73 } else if (ucs < table[mid].first) { 74 max = mid - 1; 75 } else { 76 return true; 77 } 78 } 79 80 return false; 81 } 82 83 /* 84 * The following two functions define the column width of an ISO 10646 85 * character as follows: 86 * 87 * - The null character (U+0000) has a column width of 0. 88 * 89 * - Other C0/C1 control characters and DEL will lead to a return 90 * value of -1. 91 * 92 * - Non-spacing and enclosing combining characters (general 93 * category code Mn or Me in the Unicode database) have a 94 * column width of 0. 95 * 96 * - SOFT HYPHEN (U+00AD) has a column width of 1. 97 * 98 * - Other format characters (general category code Cf in the Unicode 99 * database) and ZERO WIDTH SPACE (U+200B) have a column width of 0. 100 * 101 * - Hangul Jamo medial vowels and final consonants (U+1160-U+11FF) 102 * have a column width of 0. 103 * 104 * - Spacing characters in the East Asian Wide (W) or East Asian 105 * Full-width (F) category as defined in Unicode Technical 106 * Report #11 have a column width of 2. 107 * 108 * - All remaining characters (including all printable 109 * ISO 8859-1 and WGL4 characters, Unicode control characters, 110 * etc.) have a column width of 1. 111 * 112 * This implementation assumes that wchar_t characters are encoded 113 * in ISO 10646. 114 */ 115 116 /* sorted list of non-overlapping intervals of non-spacing characters */ 117 /* generated by "uniset +cat=Me +cat=Mn +cat=Cf -00AD +1160-11FF +200B c" */ 118 private static immutable .interval[] combining = 119 [ 120 {0x0300, 0x036F}, {0x0483, 0x0486}, {0x0488, 0x0489}, 121 {0x0591, 0x05BD}, {0x05BF, 0x05BF}, {0x05C1, 0x05C2}, 122 {0x05C4, 0x05C5}, {0x05C7, 0x05C7}, {0x0600, 0x0603}, 123 {0x0610, 0x0615}, {0x064B, 0x065E}, {0x0670, 0x0670}, 124 {0x06D6, 0x06E4}, {0x06E7, 0x06E8}, {0x06EA, 0x06ED}, 125 {0x070F, 0x070F}, {0x0711, 0x0711}, {0x0730, 0x074A}, 126 {0x07A6, 0x07B0}, {0x07EB, 0x07F3}, {0x0901, 0x0902}, 127 {0x093C, 0x093C}, {0x0941, 0x0948}, {0x094D, 0x094D}, 128 {0x0951, 0x0954}, {0x0962, 0x0963}, {0x0981, 0x0981}, 129 {0x09BC, 0x09BC}, {0x09C1, 0x09C4}, {0x09CD, 0x09CD}, 130 {0x09E2, 0x09E3}, {0x0A01, 0x0A02}, {0x0A3C, 0x0A3C}, 131 {0x0A41, 0x0A42}, {0x0A47, 0x0A48}, {0x0A4B, 0x0A4D}, 132 {0x0A70, 0x0A71}, {0x0A81, 0x0A82}, {0x0ABC, 0x0ABC}, 133 {0x0AC1, 0x0AC5}, {0x0AC7, 0x0AC8}, {0x0ACD, 0x0ACD}, 134 {0x0AE2, 0x0AE3}, {0x0B01, 0x0B01}, {0x0B3C, 0x0B3C}, 135 {0x0B3F, 0x0B3F}, {0x0B41, 0x0B43}, {0x0B4D, 0x0B4D}, 136 {0x0B56, 0x0B56}, {0x0B82, 0x0B82}, {0x0BC0, 0x0BC0}, 137 {0x0BCD, 0x0BCD}, {0x0C3E, 0x0C40}, {0x0C46, 0x0C48}, 138 {0x0C4A, 0x0C4D}, {0x0C55, 0x0C56}, {0x0CBC, 0x0CBC}, 139 {0x0CBF, 0x0CBF}, {0x0CC6, 0x0CC6}, {0x0CCC, 0x0CCD}, 140 {0x0CE2, 0x0CE3}, {0x0D41, 0x0D43}, {0x0D4D, 0x0D4D}, 141 {0x0DCA, 0x0DCA}, {0x0DD2, 0x0DD4}, {0x0DD6, 0x0DD6}, 142 {0x0E31, 0x0E31}, {0x0E34, 0x0E3A}, {0x0E47, 0x0E4E}, 143 {0x0EB1, 0x0EB1}, {0x0EB4, 0x0EB9}, {0x0EBB, 0x0EBC}, 144 {0x0EC8, 0x0ECD}, {0x0F18, 0x0F19}, {0x0F35, 0x0F35}, 145 {0x0F37, 0x0F37}, {0x0F39, 0x0F39}, {0x0F71, 0x0F7E}, 146 {0x0F80, 0x0F84}, {0x0F86, 0x0F87}, {0x0F90, 0x0F97}, 147 {0x0F99, 0x0FBC}, {0x0FC6, 0x0FC6}, {0x102D, 0x1030}, 148 {0x1032, 0x1032}, {0x1036, 0x1037}, {0x1039, 0x1039}, 149 {0x1058, 0x1059}, {0x1160, 0x11FF}, {0x135F, 0x135F}, 150 {0x1712, 0x1714}, {0x1732, 0x1734}, {0x1752, 0x1753}, 151 {0x1772, 0x1773}, {0x17B4, 0x17B5}, {0x17B7, 0x17BD}, 152 {0x17C6, 0x17C6}, {0x17C9, 0x17D3}, {0x17DD, 0x17DD}, 153 {0x180B, 0x180D}, {0x18A9, 0x18A9}, {0x1920, 0x1922}, 154 {0x1927, 0x1928}, {0x1932, 0x1932}, {0x1939, 0x193B}, 155 {0x1A17, 0x1A18}, {0x1B00, 0x1B03}, {0x1B34, 0x1B34}, 156 {0x1B36, 0x1B3A}, {0x1B3C, 0x1B3C}, {0x1B42, 0x1B42}, 157 {0x1B6B, 0x1B73}, {0x1DC0, 0x1DCA}, {0x1DFE, 0x1DFF}, 158 {0x200B, 0x200F}, {0x202A, 0x202E}, {0x2060, 0x2063}, 159 {0x206A, 0x206F}, {0x20D0, 0x20EF}, {0x302A, 0x302F}, 160 {0xA806, 0xA806}, {0xA80B, 0xA80B}, 161 {0xA825, 0xA826}, {0xFB1E, 0xFB1E}, {0xFE00, 0xFE0F}, 162 {0xFE20, 0xFE23}, {0xFEFF, 0xFEFF}, {0xFFF9, 0xFFFB}, 163 {0x010A01, 0x010A03}, {0x010A05, 0x010A06}, {0x010A0C, 0x010A0F}, 164 {0x010A38, 0x010A3A}, {0x010A3F, 0x010A3F}, {0x01D167, 0x01D169}, 165 {0x01D173, 0x01D182}, {0x01D185, 0x01D18B}, {0x01D1AA, 0x01D1AD}, 166 {0x01D242, 0x01D244}, {0x0E0001, 0x0E0001}, {0x0E0020, 0x0E007F}, 167 {0x0E0100, 0x0E01EF}, 168 ]; 169 170 private static immutable .interval[] not_apple = 171 [ 172 {0x3099, 0x309A}, 173 ]; 174 175 extern (C) 176 pure nothrow @safe @nogc @live 177 public int wcwidth_ucs(uint ucs, scope const wcwidth_cjk_option* option = default_wcwidth_cjk_option_ptr) 178 179 in 180 { 181 assert(option != null); 182 } 183 184 do 185 { 186 if (option == null) { 187 return 0; 188 } 189 190 /* test for 8-bit control characters */ 191 if (ucs == 0) { 192 return 0; 193 } 194 195 if ((ucs < 32) || ((ucs >= 0x7F) && (ucs < 0xA0))) { 196 return -1; 197 } 198 199 if (!option.is_apple) { 200 if (.bisearch(ucs, .not_apple, .not_apple.length - 1)) { 201 return 0; 202 } 203 } 204 205 /* binary search in table of non-spacing characters */ 206 if (.bisearch(ucs, .combining, .combining.length - 1)) { 207 return 0; 208 } 209 210 /* if we arrive here, ucs is not a combining or C0/C1 control character */ 211 212 /* fast test for majority of non-wide scripts */ 213 if (ucs < 0x1100) { 214 return 1; 215 } 216 217 return 1 + 218 ( 219 (ucs >= 0x1100) && 220 ( 221 /* Hangul Jamo init. consonants */ 222 (ucs <= 0x115F) || 223 224 /* CJK ... Yi */ 225 (ucs == 0x2329) || (ucs == 0x232A) || ((ucs >= 0x2E80) && (ucs <= 0xA4CF) && (ucs != 0x303F)) || 226 227 /* Hangul Syllables */ 228 ((ucs >= 0xAC00) && (ucs <= 0xD7A3)) || 229 230 /* CJK Compatibility Ideographs */ 231 ((ucs >= 0xF900) && (ucs <= 0xFAFF)) || 232 233 /* Vertical forms */ 234 ((ucs >= 0xFE10) && (ucs <= 0xFE19)) || 235 236 /* CJK Compatibility Forms */ 237 ((ucs >= 0xFE30) && (ucs <= 0xFE6F)) || 238 239 /* Fullwidth Forms */ 240 ((ucs >= 0xFF00) && (ucs <= 0xFF60)) || 241 242 ((ucs >= 0xFFE0) && (ucs <= 0xFFE6)) || ((ucs >= 0x020000) && (ucs <= 0x02FFFD)) || ((ucs >= 0x030000) && (ucs <= 0x03FFFD)) 243 ) 244 ); 245 } 246 247 extern (C) 248 pure nothrow @trusted @nogc @live 249 public int wcswidth_ucs(scope const uint* pwcs, size_t n, scope const wcwidth_cjk_option* option = default_wcwidth_cjk_option_ptr) 250 251 in 252 { 253 assert(pwcs != null); 254 assert(option != null); 255 } 256 257 do 258 { 259 int width = 0; 260 const (uint)* pwcs_ptr = pwcs; 261 262 for (; (*pwcs_ptr != '\0') && (n-- != 0); pwcs_ptr++) { 263 int w = .wcwidth_ucs(*pwcs_ptr, option); 264 265 if (w < 0) { 266 return -1; 267 } else { 268 width += w; 269 } 270 } 271 272 return width; 273 } 274 275 /* 276 * The following functions are the same as wcwidth_ucs() and 277 * wcwidth_cjk(), except that spacing characters in the East Asian 278 * Ambiguous (A) category as defined in Unicode Technical Report #11 279 * have a column width of 2. This variant might be useful for users of 280 * CJK legacy encodings who want to migrate to UCS without changing 281 * the traditional terminal character-width behaviour. It is not 282 * otherwise recommended for general use. 283 */ 284 /* 285 * In addition to the explanation mentioned above, 286 * several characters in the East Asian Narrow (Na) and Not East Asian 287 * (Neutral) category as defined in Unicode Technical Report #11 288 * actually have a column width of 2 in CJK legacy encodings. 289 */ 290 291 //#define wcwidth_cjk wcwidth 292 //#define wcswidth_cjk wcswidth 293 294 /* For FreeBSD: wcwidth() is implemented as an inline function */ 295 //#ifdef wcwidth 296 //#undef wcwidth 297 //#endif 298 299 /* 300 * sorted list of non-overlapping intervals of East Asian Ambiguous 301 * characters, generated by "uniset +WIDTH-A -cat=Me -cat=Mn -cat=Cf c" 302 */ 303 private static immutable .interval[] ambiguous = 304 [ 305 {0x00A1, 0x00A1}, {0x00A4, 0x00A4}, {0x00A7, 0x00A8}, 306 {0x00AA, 0x00AA}, {0x00AE, 0x00AE}, {0x00B0, 0x00B4}, 307 {0x00B6, 0x00BA}, {0x00BC, 0x00BF}, {0x00C6, 0x00C6}, 308 {0x00D0, 0x00D0}, {0x00D7, 0x00D8}, {0x00DE, 0x00E1}, 309 {0x00E6, 0x00E6}, {0x00E8, 0x00EA}, {0x00EC, 0x00ED}, 310 {0x00F0, 0x00F0}, {0x00F2, 0x00F3}, {0x00F7, 0x00FA}, 311 {0x00FC, 0x00FC}, {0x00FE, 0x00FE}, {0x0101, 0x0101}, 312 {0x0111, 0x0111}, {0x0113, 0x0113}, {0x011B, 0x011B}, 313 {0x0126, 0x0127}, {0x012B, 0x012B}, {0x0131, 0x0133}, 314 {0x0138, 0x0138}, {0x013F, 0x0142}, {0x0144, 0x0144}, 315 {0x0148, 0x014B}, {0x014D, 0x014D}, {0x0152, 0x0153}, 316 {0x0166, 0x0167}, {0x016B, 0x016B}, {0x01CE, 0x01CE}, 317 {0x01D0, 0x01D0}, {0x01D2, 0x01D2}, {0x01D4, 0x01D4}, 318 {0x01D6, 0x01D6}, {0x01D8, 0x01D8}, {0x01DA, 0x01DA}, 319 {0x01DC, 0x01DC}, {0x0251, 0x0251}, {0x0261, 0x0261}, 320 {0x02C4, 0x02C4}, {0x02C7, 0x02C7}, {0x02C9, 0x02CB}, 321 {0x02CD, 0x02CD}, {0x02D0, 0x02D0}, {0x02D8, 0x02DB}, 322 {0x02DD, 0x02DD}, {0x02DF, 0x02DF}, {0x0391, 0x03A1}, 323 {0x03A3, 0x03A9}, {0x03B1, 0x03C1}, {0x03C3, 0x03C9}, 324 {0x0401, 0x0401}, {0x0410, 0x044F}, {0x0451, 0x0451}, 325 {0x2010, 0x2010}, {0x2013, 0x2016}, {0x2018, 0x2019}, 326 {0x201C, 0x201D}, {0x2020, 0x2022}, {0x2024, 0x2027}, 327 {0x2030, 0x2030}, {0x2032, 0x2033}, {0x2035, 0x2035}, 328 {0x203B, 0x203B}, {0x203E, 0x203E}, {0x2074, 0x2074}, 329 {0x207F, 0x207F}, {0x2081, 0x2084}, {0x20AC, 0x20AC}, 330 {0x2103, 0x2103}, {0x2105, 0x2105}, {0x2109, 0x2109}, 331 {0x2113, 0x2113}, {0x2116, 0x2116}, {0x2121, 0x2122}, 332 {0x2126, 0x2126}, {0x212B, 0x212B}, {0x2153, 0x2154}, 333 {0x215B, 0x215E}, {0x2160, 0x216B}, {0x2170, 0x2179}, 334 {0x2190, 0x2199}, {0x21B8, 0x21B9}, {0x21D2, 0x21D2}, 335 {0x21D4, 0x21D4}, {0x21E7, 0x21E7}, {0x2200, 0x2200}, 336 {0x2202, 0x2203}, {0x2207, 0x2208}, {0x220B, 0x220B}, 337 {0x220F, 0x220F}, {0x2211, 0x2211}, {0x2215, 0x2215}, 338 {0x221A, 0x221A}, {0x221D, 0x2220}, {0x2223, 0x2223}, 339 {0x2225, 0x2225}, {0x2227, 0x222C}, {0x222E, 0x222E}, 340 {0x2234, 0x2237}, {0x223C, 0x223D}, {0x2248, 0x2248}, 341 {0x224C, 0x224C}, {0x2252, 0x2252}, {0x2260, 0x2261}, 342 {0x2264, 0x2267}, {0x226A, 0x226B}, {0x226E, 0x226F}, 343 {0x2282, 0x2283}, {0x2286, 0x2287}, {0x2295, 0x2295}, 344 {0x2299, 0x2299}, {0x22A5, 0x22A5}, {0x22BF, 0x22BF}, 345 {0x2312, 0x2312}, {0x2460, 0x24E9}, {0x24EB, 0x254B}, 346 {0x2550, 0x2573}, {0x2580, 0x258F}, {0x2592, 0x2595}, 347 {0x25A0, 0x25A1}, {0x25A3, 0x25A9}, {0x25B2, 0x25B3}, 348 {0x25B6, 0x25B7}, {0x25BC, 0x25BD}, {0x25C0, 0x25C1}, 349 {0x25C6, 0x25C8}, {0x25CB, 0x25CB}, {0x25CE, 0x25D1}, 350 {0x25E2, 0x25E5}, {0x25EF, 0x25EF}, {0x2605, 0x2606}, 351 {0x2609, 0x2609}, {0x260E, 0x260F}, {0x2614, 0x2615}, 352 {0x261C, 0x261C}, {0x261E, 0x261E}, {0x2640, 0x2640}, 353 {0x2642, 0x2642}, {0x2660, 0x2661}, {0x2663, 0x2665}, 354 {0x2667, 0x266A}, {0x266C, 0x266D}, {0x266F, 0x266F}, 355 {0x273D, 0x273D}, {0x2776, 0x277F}, {0xE000, 0xF8FF}, 356 {0xFFFD, 0xFFFD}, {0x0F0000, 0x0FFFFD}, {0x100000, 0x10FFFD}, 357 ]; 358 359 /* For Japanese legacy encodings, the following characters are added. */ 360 private static immutable .interval[] legacy_ja = [{0x00A2, 0x00A3}, {0x00A5, 0x00A6}, {0x00AC, 0x00AC}, {0x00AF, 0x00AF}, {0x2212, 0x2212}]; 361 362 private static immutable .interval[] emoji = 363 [ 364 {0x2600, 0x27BF}, 365 {0x01F300, 0x01F64F}, 366 {0x01F680, 0x01F6FF}, 367 {0x01F900, 0x01F9FF}, 368 ]; 369 370 extern (C) 371 pure nothrow @safe @nogc @live 372 public int wcwidth_cjk(uint ucs, scope const wcwidth_cjk_option* option = default_wcwidth_cjk_option_ptr) 373 374 in 375 { 376 assert(option != null); 377 } 378 379 do 380 { 381 /* binary search in table of non-spacing characters */ 382 if (.bisearch(ucs, .ambiguous, .ambiguous.length - 1)) { 383 return 2; 384 } 385 386 if (option.is_legacy_ja) { 387 if (.bisearch(ucs, .legacy_ja, .legacy_ja.length - 1)) { 388 return 2; 389 } 390 } 391 392 if (option.is_emoji) { 393 if (.bisearch(ucs, .emoji, .emoji.length - 1)) { 394 return 2; 395 } 396 } 397 398 return .wcwidth_ucs(ucs, option); 399 } 400 401 extern (C) 402 pure nothrow @trusted @nogc @live 403 public int wcswidth_cjk(scope const uint* pwcs, size_t n, scope const wcwidth_cjk_option* option = default_wcwidth_cjk_option_ptr) 404 405 in 406 { 407 assert(pwcs != null); 408 assert(option != null); 409 } 410 411 do 412 { 413 int width = 0; 414 const (uint)* pwcs_ptr = pwcs; 415 416 for (; (*pwcs_ptr != '\0') && (n-- != 0); pwcs_ptr++) { 417 int w = .wcwidth_cjk(*pwcs_ptr, option); 418 419 if (w < 0) { 420 return -1; 421 } else { 422 width += w; 423 } 424 } 425 426 return width; 427 }