Ticket #7423: terminal-utf8-spaces-v3.patch
File terminal-utf8-spaces-v3.patch, 3.3 KB (added by , 13 years ago) |
---|
-
src/apps/terminal/UTF8Char.h
7 7 8 8 #include <ctype.h> 9 9 #include <string.h> 10 #include <stdio.h> 10 11 11 12 12 struct UTF8Char { 13 13 char bytes[4]; 14 14 15 15 16 UTF8Char() 16 17 { 17 18 } 19 18 20 19 21 UTF8Char(char c) 20 22 { 21 23 bytes[0] = c; 22 24 } 25 26 27 UTF8Char(const char* c) 28 { 29 SetTo(c, ByteCount(*c)); 30 } 31 23 32 24 33 UTF8Char(const char* c, int32 count) 25 34 { 26 35 SetTo(c, count); 27 36 } 28 37 38 29 39 void SetTo(const char* c, int32 count) 30 40 { 31 41 bytes[0] = c[0]; … … 39 49 } 40 50 } 41 51 52 42 53 static int32 ByteCount(char firstChar) 43 54 { 44 55 // Note, this does not recognize invalid chars … … 50 61 return c < 0xf0 ? 3 : 4; 51 62 } 52 63 64 53 65 int32 ByteCount() const 54 66 { 55 67 return ByteCount(bytes[0]); 56 68 } 57 69 70 58 71 bool IsFullWidth() const 59 72 { 60 73 // TODO: Implement! 61 74 return false; 62 75 } 63 76 77 64 78 bool IsSpace() const 65 79 { 66 // TODO: Support multi-byte chars! 67 return ByteCount() == 1 ? isspace(bytes[0]) : false; 80 if (ByteCount() == 1) { 81 return isspace(bytes[0]); 82 } else if (ByteCount() == 2) { 83 uchar b0 = bytes[0]; 84 uchar b1 = bytes[1]; 85 86 // U+00A0 (no-break space), U+0085 (next line) 87 if (b0 == 0xc2) 88 return (b1 == 0xa0 || b1 == 0x85); 89 90 return false; 91 } else if (ByteCount() == 3) { 92 uchar b0 = bytes[0]; 93 uchar b1 = bytes[1]; 94 uchar b2 = bytes[2]; 95 96 // most multibyte space chars are all in one range (0x2000-0x200B) 97 if (b0 == 0xe2 && b1 == 0x80) 98 return (b2 >= 0x80 && b2 <= 0x8f); 99 100 // U+1680 (ogham space mark) 101 if (b0 == 0xe1 && b1 == 0x9a) 102 return (b2 == 0x80); 103 104 // U+2028 (line separator), U+2029 (paragraph separator), 105 // U+202F (narrow no-break space) 106 if (b0 == 0xe2 && b1 == 0x80) 107 return (b2 == 0xa8 || b2 == 0xa9 || b2 == 0xaf); 108 109 // U+205F (medium mathematical space) 110 if (b0 == 0xe2 && b1 == 0x81) 111 return (b2 == 0x9f); 112 113 // U+3000 (ideographic space) 114 if (b0 == 0xe3 && b1 == 0x80) 115 return (b2 == 0x80); 116 117 // U+FEFF (zero-width non-breaking space) 118 if (b0 == 0xef && b1 == 0xbb) 119 return (b2 == 0xbf); 120 121 return false; 122 } else { 123 return false; 124 } 68 125 } 69 126 70 127 UTF8Char ToLower() const … … 76 133 return UTF8Char((char)tolower(bytes[0])); 77 134 } 78 135 136 79 137 bool operator==(const UTF8Char& other) const 80 138 { 81 139 int32 byteCount = ByteCount(); … … 91 149 return equals; 92 150 } 93 151 152 94 153 bool operator!=(const UTF8Char& other) const 95 154 { 96 155 return !(*this == other); -
src/apps/terminal/TermView.cpp
405 405 406 406 virtual int Classify(const char* character) 407 407 { 408 // TODO: Deal correctly with non-ASCII chars. 408 UTF8Char u8c(character); 409 410 if (u8c.IsSpace()) 411 return CHAR_TYPE_SPACE; 412 409 413 char c = *character; 410 if ( UTF8Char::ByteCount(c) > 1)414 if (u8c.ByteCount() > 1 || isalnum(c) || strchr(fSpecialWordChars, c) != NULL) 411 415 return CHAR_TYPE_WORD_CHAR; 412 416 413 if (isspace(c))414 return CHAR_TYPE_SPACE;415 if (isalnum(c) || strchr(fSpecialWordChars, c) != NULL)416 return CHAR_TYPE_WORD_CHAR;417 418 417 return CHAR_TYPE_WORD_DELIMITER; 419 418 } 420 419