1 /* LibTomCrypt, modular cryptographic library -- Tom St Denis */
2 /* SPDX-License-Identifier: Unlicense */
3 #include "tomcrypt_private.h"
4 
5 /**
6   @file der_decode_utf8_string.c
7   ASN.1 DER, encode a UTF8 STRING, Tom St Denis
8 */
9 
10 
11 #ifdef LTC_DER
12 
13 /**
14   Decode a UTF8 STRING and recover an array of unicode characters.
15   @param in      The DER encoded UTF8 STRING
16   @param inlen   The size of the DER UTF8 STRING
17   @param out     [out] The array of unicode characters (wchar_t*)
18   @param outlen  [in/out] The number of unicode characters in the array
19   @return CRYPT_OK if successful
20 */
der_decode_utf8_string(const unsigned char * in,unsigned long inlen,wchar_t * out,unsigned long * outlen)21 int der_decode_utf8_string(const unsigned char *in,  unsigned long inlen,
22                                        wchar_t *out, unsigned long *outlen)
23 {
24    wchar_t       tmp;
25    unsigned long x, y, z, len;
26    int err;
27 
28    LTC_ARGCHK(in     != NULL);
29    LTC_ARGCHK(out    != NULL);
30    LTC_ARGCHK(outlen != NULL);
31 
32    /* must have header at least */
33    if (inlen < 2) {
34       return CRYPT_INVALID_PACKET;
35    }
36 
37    /* check for 0x0C */
38    if ((in[0] & 0x1F) != 0x0C) {
39       return CRYPT_INVALID_PACKET;
40    }
41    x = 1;
42 
43    /* get the length of the data */
44    y = inlen - x;
45    if ((err = der_decode_asn1_length(in + x, &y, &len)) != CRYPT_OK) {
46       return err;
47    }
48    x += y;
49 
50    if (len > (inlen - x)) {
51       return CRYPT_INVALID_PACKET;
52    }
53 
54    /* proceed to recover unicode characters from utf8 data.
55       for reference see Section 3 of RFC 3629:
56 
57         https://tools.ietf.org/html/rfc3629#section-3
58     */
59    for (y = 0; x < inlen; ) {
60       /* read first byte */
61       tmp = in[x++];
62 
63       /* a unicode character is recovered from a sequence of 1 to 4 utf8 bytes.
64          the form of those bytes must match a row in the following table:
65 
66            0xxxxxxx
67            110xxxxx 10xxxxxx
68            1110xxxx 10xxxxxx 10xxxxxx
69            11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
70 
71          the number of leading ones in the first byte (0,2,3,4) determines the
72          number of remaining bytes to read (0,1,2,3)
73        */
74 
75       /* determine z, the number of leading ones.
76          this is done by left-shifting tmp, which clears the ms-bits */
77       for (z = 0; (tmp & 0x80) && (z <= 4); z++, tmp = (tmp << 1) & 0xFF);
78 
79       /* z should be in {0,2,3,4} */
80       if (z == 1 || z > 4) {
81          return CRYPT_INVALID_PACKET;
82       }
83 
84       /* right-shift tmp to restore least-sig bits */
85       tmp >>= z;
86 
87       /* now update z so it equals the number of additional bytes to read */
88       if (z > 0) { --z; }
89 
90       if (x + z > inlen) {
91          return CRYPT_INVALID_PACKET;
92       }
93 
94       /* read remaining bytes */
95       while (z-- != 0) {
96          if ((in[x] & 0xC0) != 0x80) {
97             return CRYPT_INVALID_PACKET;
98          }
99          tmp = (tmp << 6) | ((wchar_t)in[x++] & 0x3F);
100       }
101 
102       if (y < *outlen) {
103          out[y] = tmp;
104       }
105       y++;
106    }
107    if (y > *outlen) {
108       err = CRYPT_BUFFER_OVERFLOW;
109    } else {
110       err = CRYPT_OK;
111    }
112    *outlen = y;
113 
114    return err;
115 }
116 
117 #endif
118