1 // std::codecvt implementation details, generic version -*- C++ -*- 2 3 // Copyright (C) 2002-2019 Free Software Foundation, Inc. 4 // 5 // This file is part of the GNU ISO C++ Library. This library is free 6 // software; you can redistribute it and/or modify it under the 7 // terms of the GNU General Public License as published by the 8 // Free Software Foundation; either version 3, or (at your option) 9 // any later version. 10 11 // This library is distributed in the hope that it will be useful, 12 // but WITHOUT ANY WARRANTY; without even the implied warranty of 13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 // GNU General Public License for more details. 15 16 // Under Section 7 of GPL version 3, you are granted additional 17 // permissions described in the GCC Runtime Library Exception, version 18 // 3.1, as published by the Free Software Foundation. 19 20 // You should have received a copy of the GNU General Public License and 21 // a copy of the GCC Runtime Library Exception along with this program; 22 // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see 23 // <http://www.gnu.org/licenses/>. 24 25 // 26 // ISO C++ 14882: 22.2.1.5 - Template class codecvt 27 // 28 29 // Written by Benjamin Kosnik <bkoz@redhat.com> 30 31 #include <locale> 32 #include <cstdlib> // For MB_CUR_MAX 33 #include <climits> // For MB_LEN_MAX 34 #include <cstring> 35 36 namespace std _GLIBCXX_VISIBILITY(default) 37 { 38 _GLIBCXX_BEGIN_NAMESPACE_VERSION 39 40 // Specializations. 41 #ifdef _GLIBCXX_USE_WCHAR_T 42 codecvt_base::result 43 codecvt<wchar_t, char, mbstate_t>:: do_out(state_type & __state,const intern_type * __from,const intern_type * __from_end,const intern_type * & __from_next,extern_type * __to,extern_type * __to_end,extern_type * & __to_next) const44 do_out(state_type& __state, const intern_type* __from, 45 const intern_type* __from_end, const intern_type*& __from_next, 46 extern_type* __to, extern_type* __to_end, 47 extern_type*& __to_next) const 48 { 49 result __ret = ok; 50 // The conversion must be done using a temporary destination buffer 51 // since it is not possible to pass the size of the buffer to wcrtomb 52 state_type __tmp_state(__state); 53 54 // The conversion must be done by calling wcrtomb in a loop rather 55 // than using wcsrtombs because wcsrtombs assumes that the input is 56 // zero-terminated. 57 58 // Either we can upper bound the total number of external characters to 59 // something smaller than __to_end - __to or the conversion must be done 60 // using a temporary destination buffer since it is not possible to 61 // pass the size of the buffer to wcrtomb 62 if (MB_CUR_MAX * (__from_end - __from) - (__to_end - __to) <= 0) 63 while (__from < __from_end) 64 { 65 const size_t __conv = wcrtomb(__to, *__from, &__tmp_state); 66 if (__conv == static_cast<size_t>(-1)) 67 { 68 __ret = error; 69 break; 70 } 71 __state = __tmp_state; 72 __to += __conv; 73 __from++; 74 } 75 else 76 { 77 extern_type __buf[MB_LEN_MAX]; 78 while (__from < __from_end && __to < __to_end) 79 { 80 const size_t __conv = wcrtomb(__buf, *__from, &__tmp_state); 81 if (__conv == static_cast<size_t>(-1)) 82 { 83 __ret = error; 84 break; 85 } 86 else if (__conv > static_cast<size_t>(__to_end - __to)) 87 { 88 __ret = partial; 89 break; 90 } 91 92 memcpy(__to, __buf, __conv); 93 __state = __tmp_state; 94 __to += __conv; 95 __from++; 96 } 97 } 98 99 if (__ret == ok && __from < __from_end) 100 __ret = partial; 101 102 __from_next = __from; 103 __to_next = __to; 104 return __ret; 105 } 106 107 codecvt_base::result 108 codecvt<wchar_t, char, mbstate_t>:: do_in(state_type & __state,const extern_type * __from,const extern_type * __from_end,const extern_type * & __from_next,intern_type * __to,intern_type * __to_end,intern_type * & __to_next) const109 do_in(state_type& __state, const extern_type* __from, 110 const extern_type* __from_end, const extern_type*& __from_next, 111 intern_type* __to, intern_type* __to_end, 112 intern_type*& __to_next) const 113 { 114 result __ret = ok; 115 // This temporary state object is necessary so __state won't be modified 116 // if [__from, __from_end) is a partial multibyte character. 117 state_type __tmp_state(__state); 118 119 // Conversion must be done by calling mbrtowc in a loop rather than 120 // by calling mbsrtowcs because mbsrtowcs assumes that the input 121 // sequence is zero-terminated. 122 while (__from < __from_end && __to < __to_end) 123 { 124 size_t __conv = mbrtowc(__to, __from, __from_end - __from, 125 &__tmp_state); 126 if (__conv == static_cast<size_t>(-1)) 127 { 128 __ret = error; 129 break; 130 } 131 else if (__conv == static_cast<size_t>(-2)) 132 { 133 // It is unclear what to return in this case (see DR 382). 134 __ret = partial; 135 break; 136 } 137 else if (__conv == 0) 138 { 139 // XXX Probably wrong for stateful encodings 140 __conv = 1; 141 *__to = L'\0'; 142 } 143 144 __state = __tmp_state; 145 __to++; 146 __from += __conv; 147 } 148 149 // It is not clear that __from < __from_end implies __ret != ok 150 // (see DR 382). 151 if (__ret == ok && __from < __from_end) 152 __ret = partial; 153 154 __from_next = __from; 155 __to_next = __to; 156 return __ret; 157 } 158 159 int 160 codecvt<wchar_t, char, mbstate_t>:: do_encoding() const161 do_encoding() const throw() 162 { 163 // XXX This implementation assumes that the encoding is 164 // stateless and is either single-byte or variable-width. 165 int __ret = 0; 166 if (MB_CUR_MAX == 1) 167 __ret = 1; 168 return __ret; 169 } 170 171 int 172 codecvt<wchar_t, char, mbstate_t>:: do_max_length() const173 do_max_length() const throw() 174 { 175 // XXX Probably wrong for stateful encodings. 176 int __ret = MB_CUR_MAX; 177 return __ret; 178 } 179 180 int 181 codecvt<wchar_t, char, mbstate_t>:: do_length(state_type & __state,const extern_type * __from,const extern_type * __end,size_t __max) const182 do_length(state_type& __state, const extern_type* __from, 183 const extern_type* __end, size_t __max) const 184 { 185 int __ret = 0; 186 state_type __tmp_state(__state); 187 188 while (__from < __end && __max) 189 { 190 size_t __conv = mbrtowc(0, __from, __end - __from, &__tmp_state); 191 if (__conv == static_cast<size_t>(-1)) 192 { 193 // Invalid source character 194 break; 195 } 196 else if (__conv == static_cast<size_t>(-2)) 197 { 198 // Remainder of input does not form a complete destination 199 // character. 200 break; 201 } 202 else if (__conv == 0) 203 { 204 // XXX Probably wrong for stateful encodings 205 __conv = 1; 206 } 207 208 __state = __tmp_state; 209 __from += __conv; 210 __ret += __conv; 211 __max--; 212 } 213 214 return __ret; 215 } 216 #endif 217 218 _GLIBCXX_END_NAMESPACE_VERSION 219 } // namespace 220