1 /* SPDX-License-Identifier: GPL-2.0+ */ 2 /* 3 * charset conversion utils 4 * 5 * Copyright (c) 2017 Rob Clark 6 */ 7 8 #ifndef __CHARSET_H_ 9 #define __CHARSET_H_ 10 11 #include <linux/kernel.h> 12 #include <linux/types.h> 13 14 #define MAX_UTF8_PER_UTF16 3 15 16 /* 17 * codepage_437 - Unicode to codepage 437 translation table 18 */ 19 extern const u16 codepage_437[160]; 20 21 /** 22 * console_read_unicode() - read Unicode code point from console 23 * 24 * @code: pointer to store Unicode code point 25 * Return: 0 = success 26 */ 27 int console_read_unicode(s32 *code); 28 29 /** 30 * utf8_get() - get next UTF-8 code point from buffer 31 * 32 * @src: pointer to current byte, updated to point to next byte 33 * Return: code point, or 0 for end of string, or -1 if no legal 34 * code point is found. In case of an error src points to 35 * the incorrect byte. 36 */ 37 s32 utf8_get(const char **src); 38 39 /** 40 * utf8_put() - write UTF-8 code point to buffer 41 * 42 * @code: code point 43 * @dst: pointer to destination buffer, updated to next position 44 * Return: -1 if the input parameters are invalid 45 */ 46 int utf8_put(s32 code, char **dst); 47 48 /** 49 * utf8_utf16_strnlen() - length of a truncated utf-8 string after conversion 50 * to utf-16 51 * 52 * @src: utf-8 string 53 * @count: maximum number of code points to convert 54 * Return: length in u16 after conversion to utf-16 without the 55 * trailing \0. If an invalid UTF-8 sequence is hit one 56 * u16 will be reserved for a replacement character. 57 */ 58 size_t utf8_utf16_strnlen(const char *src, size_t count); 59 60 /** 61 * utf8_utf16_strlen() - length of a utf-8 string after conversion to utf-16 62 * 63 * @a: utf-8 string 64 * Return: length in u16 after conversion to utf-16 without the 65 * trailing \0. If an invalid UTF-8 sequence is hit one 66 * u16 will be reserved for a replacement character. 67 */ 68 #define utf8_utf16_strlen(a) utf8_utf16_strnlen((a), SIZE_MAX) 69 70 /** 71 * utf8_utf16_strncpy() - copy utf-8 string to utf-16 string 72 * 73 * @dst: destination buffer 74 * @src: source buffer 75 * @count: maximum number of code points to copy 76 * Return: -1 if the input parameters are invalid 77 */ 78 int utf8_utf16_strncpy(u16 **dst, const char *src, size_t count); 79 80 /** 81 * utf8_utf16_strcpy() - copy utf-8 string to utf-16 string 82 * 83 * @d: destination buffer 84 * @s: source buffer 85 * Return: -1 if the input parameters are invalid 86 */ 87 #define utf8_utf16_strcpy(d, s) utf8_utf16_strncpy((d), (s), SIZE_MAX) 88 89 /** 90 * utf16_get() - get next UTF-16 code point from buffer 91 * 92 * @src: pointer to current word, updated to point to next word 93 * Return: code point, or 0 for end of string, or -1 if no legal 94 * code point is found. In case of an error src points to 95 * the incorrect word. 96 */ 97 s32 utf16_get(const u16 **src); 98 99 /** 100 * utf16_put() - write UTF-16 code point to buffer 101 * 102 * @code: code point 103 * @dst: pointer to destination buffer, updated to next position 104 * Return: -1 if the input parameters are invalid 105 */ 106 int utf16_put(s32 code, u16 **dst); 107 108 /** 109 * utf16_strnlen() - length of a truncated utf-16 string 110 * 111 * @src: utf-16 string 112 * @count: maximum number of code points to convert 113 * Return: length in code points. If an invalid UTF-16 sequence is 114 * hit one position will be reserved for a replacement 115 * character. 116 */ 117 size_t utf16_strnlen(const u16 *src, size_t count); 118 119 /** 120 * utf16_utf8_strnlen() - length of a truncated utf-16 string after conversion 121 * to utf-8 122 * 123 * @src: utf-16 string 124 * @count: maximum number of code points to convert 125 * Return: length in bytes after conversion to utf-8 without the 126 * trailing \0. If an invalid UTF-16 sequence is hit one 127 * byte will be reserved for a replacement character. 128 */ 129 size_t utf16_utf8_strnlen(const u16 *src, size_t count); 130 131 /** 132 * utf16_utf8_strlen() - length of a utf-16 string after conversion to utf-8 133 * 134 * @a: utf-16 string 135 * Return: length in bytes after conversion to utf-8 without the 136 * trailing \0. If an invalid UTF-16 sequence is hit one 137 * byte will be reserved for a replacement character. 138 */ 139 #define utf16_utf8_strlen(a) utf16_utf8_strnlen((a), SIZE_MAX) 140 141 /** 142 * utf16_utf8_strncpy() - copy utf-16 string to utf-8 string 143 * 144 * @dst: destination buffer 145 * @src: source buffer 146 * @count: maximum number of code points to copy 147 * Return: -1 if the input parameters are invalid 148 */ 149 int utf16_utf8_strncpy(char **dst, const u16 *src, size_t count); 150 151 /** 152 * utf16_utf8_strcpy() - copy utf-16 string to utf-8 string 153 * 154 * @d: destination buffer 155 * @s: source buffer 156 * Return: -1 if the input parameters are invalid 157 */ 158 #define utf16_utf8_strcpy(d, s) utf16_utf8_strncpy((d), (s), SIZE_MAX) 159 160 /** 161 * utf_to_lower() - convert a Unicode letter to lower case 162 * 163 * @code: letter to convert 164 * Return: lower case letter or unchanged letter 165 */ 166 s32 utf_to_lower(const s32 code); 167 168 /** 169 * utf_to_upper() - convert a Unicode letter to upper case 170 * 171 * @code: letter to convert 172 * Return: upper case letter or unchanged letter 173 */ 174 s32 utf_to_upper(const s32 code); 175 176 /** 177 * u16_strcasecmp() - compare two u16 strings case insensitively 178 * 179 * @s1: first string to compare 180 * @s2: second string to compare 181 * Return: 0 if the first n u16 are the same in s1 and s2 182 * < 0 if the first different u16 in s1 is less than the 183 * corresponding u16 in s2 184 * > 0 if the first different u16 in s1 is greater than the 185 */ 186 int u16_strcasecmp(const u16 *s1, const u16 *s2); 187 188 /** 189 * u16_strncmp() - compare two u16 string 190 * 191 * @s1: first string to compare 192 * @s2: second string to compare 193 * @n: maximum number of u16 to compare 194 * Return: 0 if the first n u16 are the same in s1 and s2 195 * < 0 if the first different u16 in s1 is less than the 196 * corresponding u16 in s2 197 * > 0 if the first different u16 in s1 is greater than the 198 * corresponding u16 in s2 199 */ 200 int u16_strncmp(const u16 *s1, const u16 *s2, size_t n); 201 202 /** 203 * u16_strcmp() - compare two u16 string 204 * 205 * @s1: first string to compare 206 * @s2: second string to compare 207 * Return: 0 if the first n u16 are the same in s1 and s2 208 * < 0 if the first different u16 in s1 is less than the 209 * corresponding u16 in s2 210 * > 0 if the first different u16 in s1 is greater than the 211 * corresponding u16 in s2 212 */ 213 #define u16_strcmp(s1, s2) u16_strncmp((s1), (s2), SIZE_MAX) 214 215 /** 216 * u16_strsize() - count size of u16 string in bytes including the null 217 * character 218 * 219 * Counts the number of bytes occupied by a u16 string 220 * 221 * @in: null terminated u16 string 222 * Return: bytes in a u16 string 223 */ 224 size_t u16_strsize(const void *in); 225 226 /** 227 * u16_strnlen() - count non-zero words 228 * 229 * This function matches wscnlen_s() if the -fshort-wchar compiler flag is set. 230 * In the EFI context we explicitly need a function handling u16 strings. 231 * 232 * @in: null terminated u16 string 233 * @count: maximum number of words to count 234 * Return: number of non-zero words. 235 * This is not the number of utf-16 letters! 236 */ 237 size_t u16_strnlen(const u16 *in, size_t count); 238 239 /** 240 * u16_strlen - count non-zero words 241 * 242 * This function matches wsclen() if the -fshort-wchar compiler flag is set. 243 * In the EFI context we explicitly need a function handling u16 strings. 244 * 245 * @in: null terminated u16 string 246 * Return: number of non-zero words. 247 * This is not the number of utf-16 letters! 248 */ 249 size_t u16_strlen(const void *in); 250 251 #define u16_strlen(in) u16_strnlen(in, SIZE_MAX) 252 253 /** 254 * u16_strcpy() - copy u16 string 255 * 256 * Copy u16 string pointed to by src, including terminating null word, to 257 * the buffer pointed to by dest. 258 * 259 * @dest: destination buffer 260 * @src: source buffer (null terminated) 261 * Return: 'dest' address 262 */ 263 u16 *u16_strcpy(u16 *dest, const u16 *src); 264 265 /** 266 * u16_strdup() - duplicate u16 string 267 * 268 * Copy u16 string pointed to by src, including terminating null word, to a 269 * newly allocated buffer. 270 * 271 * @src: source buffer (null terminated) 272 * Return: allocated new buffer on success, NULL on failure 273 */ 274 u16 *u16_strdup(const void *src); 275 276 /** 277 * u16_strlcat() - Append a length-limited, %NUL-terminated string to another 278 * 279 * Append the source string @src to the destination string @dest, overwriting 280 * null word at the end of @dest adding a terminating null word. 281 * 282 * @dest: zero terminated u16 destination string 283 * @src: zero terminated u16 source string 284 * @count: size of buffer in u16 words including taling 0x0000 285 * Return: required size including trailing 0x0000 in u16 words 286 * If return value >= count, truncation occurred. 287 */ 288 size_t u16_strlcat(u16 *dest, const u16 *src, size_t count); 289 290 /** 291 * utf16_to_utf8() - Convert an utf16 string to utf8 292 * 293 * Converts 'size' characters of the utf16 string 'src' to utf8 294 * written to the 'dest' buffer. 295 * 296 * NOTE that a single utf16 character can generate up to 3 utf8 297 * characters. See MAX_UTF8_PER_UTF16. 298 * 299 * @dest: the destination buffer to write the utf8 characters 300 * @src: the source utf16 string 301 * @size: the number of utf16 characters to convert 302 * Return: the pointer to the first unwritten byte in 'dest' 303 */ 304 uint8_t *utf16_to_utf8(uint8_t *dest, const uint16_t *src, size_t size); 305 306 /** 307 * utf_to_cp() - translate Unicode code point to 8bit codepage 308 * 309 * Codepoints that do not exist in the codepage are rendered as question mark. 310 * 311 * @c: pointer to Unicode code point to be translated 312 * @codepage: Unicode to codepage translation table 313 * Return: 0 on success, -ENOENT if codepoint cannot be translated 314 */ 315 int utf_to_cp(s32 *c, const u16 *codepage); 316 317 /** 318 * utf8_to_cp437_stream() - convert UTF-8 stream to codepage 437 319 * 320 * @c: next UTF-8 character to convert 321 * @buffer: buffer, at least 5 characters 322 * Return: next codepage 437 character or 0 323 */ 324 int utf8_to_cp437_stream(u8 c, char *buffer); 325 326 /** 327 * utf8_to_utf32_stream() - convert UTF-8 byte stream to Unicode code points 328 * 329 * The function is called for each byte @c in a UTF-8 stream. The byte is 330 * appended to the temporary storage @buffer until the UTF-8 stream in 331 * @buffer describes a Unicode code point. 332 * 333 * When a new code point has been decoded it is returned and buffer[0] is 334 * set to '\0', otherwise the return value is 0. 335 * 336 * The buffer must be at least 5 characters long. Before the first function 337 * invocation buffer[0] must be set to '\0'." 338 * 339 * @c: next UTF-8 character to convert 340 * @buffer: buffer, at least 5 characters 341 * Return: Unicode code point or 0 342 */ 343 int utf8_to_utf32_stream(u8 c, char *buffer); 344 345 #endif /* __CHARSET_H_ */ 346