1 #include "mf_encoding.h" 2 3 #if MF_ENCODING == MF_ENCODING_UTF8 4 mf_getchar(mf_str * str)5mf_char mf_getchar(mf_str *str) 6 { 7 uint8_t c; 8 uint8_t tmp, seqlen; 9 uint16_t result; 10 11 c = **str; 12 if (!c) 13 return 0; 14 15 (*str)++; 16 17 if ((c & 0x80) == 0) 18 { 19 /* Just normal ASCII character. */ 20 return c; 21 } 22 else if ((c & 0xC0) == 0x80) 23 { 24 /* Dangling piece of corrupted multibyte sequence. 25 * Did you cut the string in the wrong place? 26 */ 27 return c; 28 } 29 else if ((**str & 0xC0) == 0xC0) 30 { 31 /* Start of multibyte sequence without any following bytes. 32 * Silly. Maybe you are using the wrong encoding. 33 */ 34 return c; 35 } 36 else 37 { 38 /* Beginning of a multi-byte sequence. 39 * Find out how many characters and combine them. 40 */ 41 seqlen = 2; 42 tmp = 0x20; 43 result = 0; 44 while ((c & tmp) && (seqlen < 5)) 45 { 46 seqlen++; 47 tmp >>= 1; 48 49 result = (result << 6) | (**str & 0x3F); 50 (*str)++; 51 } 52 53 result = (result << 6) | (**str & 0x3F); 54 (*str)++; 55 56 result |= (c & (tmp - 1)) << ((seqlen - 1) * 6); 57 return result; 58 } 59 } 60 mf_rewind(mf_str * str)61void mf_rewind(mf_str *str) 62 { 63 (*str)--; 64 65 while ((**str & 0x80) != 0x00 && (**str & 0xC0) != 0xC0) 66 (*str)--; 67 } 68 69 #else 70 mf_getchar(mf_str * str)71mf_char mf_getchar(mf_str *str) 72 { 73 if (!(**str)) 74 return 0; 75 else 76 return *(*str)++; 77 } 78 mf_rewind(mf_str * str)79void mf_rewind(mf_str *str) 80 { 81 (*str)--; 82 } 83 84 #endif 85