1 // std::codecvt implementation details, GNU version -*- C++ -*- 2 3 // Copyright (C) 2002-2015 Free Software Foundation, Inc. 4 // 5 // This file is part of the GNU ISO C++ Library. This library is free 6 // software; you can redistribute it and/or modify it under the 7 // terms of the GNU General Public License as published by the 8 // Free Software Foundation; either version 3, or (at your option) 9 // any later version. 10 11 // This library is distributed in the hope that it will be useful, 12 // but WITHOUT ANY WARRANTY; without even the implied warranty of 13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 // GNU General Public License for more details. 15 16 // Under Section 7 of GPL version 3, you are granted additional 17 // permissions described in the GCC Runtime Library Exception, version 18 // 3.1, as published by the Free Software Foundation. 19 20 // You should have received a copy of the GNU General Public License and 21 // a copy of the GCC Runtime Library Exception along with this program; 22 // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see 23 // <http://www.gnu.org/licenses/>. 24 25 // 26 // ISO C++ 14882: 22.2.1.5 - Template class codecvt 27 // 28 29 // Written by Benjamin Kosnik <bkoz@redhat.com> 30 31 #include <locale> 32 #include <cstdlib> // For MB_CUR_MAX 33 #include <climits> // For MB_LEN_MAX 34 #include <bits/c++locale_internal.h> 35 36 namespace std _GLIBCXX_VISIBILITY(default) 37 { 38 _GLIBCXX_BEGIN_NAMESPACE_VERSION 39 40 // Specializations. 41 #ifdef _GLIBCXX_USE_WCHAR_T 42 codecvt_base::result 43 codecvt<wchar_t, char, mbstate_t>:: do_out(state_type & __state,const intern_type * __from,const intern_type * __from_end,const intern_type * & __from_next,extern_type * __to,extern_type * __to_end,extern_type * & __to_next) const44 do_out(state_type& __state, const intern_type* __from, 45 const intern_type* __from_end, const intern_type*& __from_next, 46 extern_type* __to, extern_type* __to_end, 47 extern_type*& __to_next) const 48 { 49 result __ret = ok; 50 state_type __tmp_state(__state); 51 52 #if __GLIBC__ > 2 || (__GLIBC__ == 2 && __GLIBC_MINOR__ > 2) 53 __c_locale __old = __uselocale(_M_c_locale_codecvt); 54 #endif 55 56 // wcsnrtombs is *very* fast but stops if encounters NUL characters: 57 // in case we fall back to wcrtomb and then continue, in a loop. 58 // NB: wcsnrtombs is a GNU extension 59 for (__from_next = __from, __to_next = __to; 60 __from_next < __from_end && __to_next < __to_end 61 && __ret == ok;) 62 { 63 const intern_type* __from_chunk_end = wmemchr(__from_next, L'\0', 64 __from_end - __from_next); 65 if (!__from_chunk_end) 66 __from_chunk_end = __from_end; 67 68 __from = __from_next; 69 const size_t __conv = wcsnrtombs(__to_next, &__from_next, 70 __from_chunk_end - __from_next, 71 __to_end - __to_next, &__state); 72 if (__conv == static_cast<size_t>(-1)) 73 { 74 // In case of error, in order to stop at the exact place we 75 // have to start again from the beginning with a series of 76 // wcrtomb. 77 for (; __from < __from_next; ++__from) 78 __to_next += wcrtomb(__to_next, *__from, &__tmp_state); 79 __state = __tmp_state; 80 __ret = error; 81 } 82 else if (__from_next && __from_next < __from_chunk_end) 83 { 84 __to_next += __conv; 85 __ret = partial; 86 } 87 else 88 { 89 __from_next = __from_chunk_end; 90 __to_next += __conv; 91 } 92 93 if (__from_next < __from_end && __ret == ok) 94 { 95 extern_type __buf[MB_LEN_MAX]; 96 __tmp_state = __state; 97 const size_t __conv2 = wcrtomb(__buf, *__from_next, &__tmp_state); 98 if (__conv2 > static_cast<size_t>(__to_end - __to_next)) 99 __ret = partial; 100 else 101 { 102 memcpy(__to_next, __buf, __conv2); 103 __state = __tmp_state; 104 __to_next += __conv2; 105 ++__from_next; 106 } 107 } 108 } 109 110 #if __GLIBC__ > 2 || (__GLIBC__ == 2 && __GLIBC_MINOR__ > 2) 111 __uselocale(__old); 112 #endif 113 114 return __ret; 115 } 116 117 codecvt_base::result 118 codecvt<wchar_t, char, mbstate_t>:: do_in(state_type & __state,const extern_type * __from,const extern_type * __from_end,const extern_type * & __from_next,intern_type * __to,intern_type * __to_end,intern_type * & __to_next) const119 do_in(state_type& __state, const extern_type* __from, 120 const extern_type* __from_end, const extern_type*& __from_next, 121 intern_type* __to, intern_type* __to_end, 122 intern_type*& __to_next) const 123 { 124 result __ret = ok; 125 state_type __tmp_state(__state); 126 127 #if __GLIBC__ > 2 || (__GLIBC__ == 2 && __GLIBC_MINOR__ > 2) 128 __c_locale __old = __uselocale(_M_c_locale_codecvt); 129 #endif 130 131 // mbsnrtowcs is *very* fast but stops if encounters NUL characters: 132 // in case we store a L'\0' and then continue, in a loop. 133 // NB: mbsnrtowcs is a GNU extension 134 for (__from_next = __from, __to_next = __to; 135 __from_next < __from_end && __to_next < __to_end 136 && __ret == ok;) 137 { 138 const extern_type* __from_chunk_end; 139 __from_chunk_end = static_cast<const extern_type*>(memchr(__from_next, '\0', 140 __from_end 141 - __from_next)); 142 if (!__from_chunk_end) 143 __from_chunk_end = __from_end; 144 145 __from = __from_next; 146 size_t __conv = mbsnrtowcs(__to_next, &__from_next, 147 __from_chunk_end - __from_next, 148 __to_end - __to_next, &__state); 149 if (__conv == static_cast<size_t>(-1)) 150 { 151 // In case of error, in order to stop at the exact place we 152 // have to start again from the beginning with a series of 153 // mbrtowc. 154 for (;; ++__to_next, __from += __conv) 155 { 156 __conv = mbrtowc(__to_next, __from, __from_end - __from, 157 &__tmp_state); 158 if (__conv == static_cast<size_t>(-1) 159 || __conv == static_cast<size_t>(-2)) 160 break; 161 } 162 __from_next = __from; 163 __state = __tmp_state; 164 __ret = error; 165 } 166 else if (__from_next && __from_next < __from_chunk_end) 167 { 168 // It is unclear what to return in this case (see DR 382). 169 __to_next += __conv; 170 __ret = partial; 171 } 172 else 173 { 174 __from_next = __from_chunk_end; 175 __to_next += __conv; 176 } 177 178 if (__from_next < __from_end && __ret == ok) 179 { 180 if (__to_next < __to_end) 181 { 182 // XXX Probably wrong for stateful encodings 183 __tmp_state = __state; 184 ++__from_next; 185 *__to_next++ = L'\0'; 186 } 187 else 188 __ret = partial; 189 } 190 } 191 192 #if __GLIBC__ > 2 || (__GLIBC__ == 2 && __GLIBC_MINOR__ > 2) 193 __uselocale(__old); 194 #endif 195 196 return __ret; 197 } 198 199 int 200 codecvt<wchar_t, char, mbstate_t>:: do_encoding() const201 do_encoding() const throw() 202 { 203 // XXX This implementation assumes that the encoding is 204 // stateless and is either single-byte or variable-width. 205 int __ret = 0; 206 #if __GLIBC__ > 2 || (__GLIBC__ == 2 && __GLIBC_MINOR__ > 2) 207 __c_locale __old = __uselocale(_M_c_locale_codecvt); 208 #endif 209 if (MB_CUR_MAX == 1) 210 __ret = 1; 211 #if __GLIBC__ > 2 || (__GLIBC__ == 2 && __GLIBC_MINOR__ > 2) 212 __uselocale(__old); 213 #endif 214 return __ret; 215 } 216 217 int 218 codecvt<wchar_t, char, mbstate_t>:: do_max_length() const219 do_max_length() const throw() 220 { 221 #if __GLIBC__ > 2 || (__GLIBC__ == 2 && __GLIBC_MINOR__ > 2) 222 __c_locale __old = __uselocale(_M_c_locale_codecvt); 223 #endif 224 // XXX Probably wrong for stateful encodings. 225 int __ret = MB_CUR_MAX; 226 #if __GLIBC__ > 2 || (__GLIBC__ == 2 && __GLIBC_MINOR__ > 2) 227 __uselocale(__old); 228 #endif 229 return __ret; 230 } 231 232 int 233 codecvt<wchar_t, char, mbstate_t>:: do_length(state_type & __state,const extern_type * __from,const extern_type * __end,size_t __max) const234 do_length(state_type& __state, const extern_type* __from, 235 const extern_type* __end, size_t __max) const 236 { 237 int __ret = 0; 238 state_type __tmp_state(__state); 239 240 #if __GLIBC__ > 2 || (__GLIBC__ == 2 && __GLIBC_MINOR__ > 2) 241 __c_locale __old = __uselocale(_M_c_locale_codecvt); 242 #endif 243 244 // mbsnrtowcs is *very* fast but stops if encounters NUL characters: 245 // in case we advance past it and then continue, in a loop. 246 // NB: mbsnrtowcs is a GNU extension 247 248 // A dummy internal buffer is needed in order for mbsnrtocws to consider 249 // its fourth parameter (it wouldn't with NULL as first parameter). 250 wchar_t* __to = static_cast<wchar_t*>(__builtin_alloca(sizeof(wchar_t) 251 * __max)); 252 while (__from < __end && __max) 253 { 254 const extern_type* __from_chunk_end; 255 __from_chunk_end = static_cast<const extern_type*>(memchr(__from, '\0', 256 __end 257 - __from)); 258 if (!__from_chunk_end) 259 __from_chunk_end = __end; 260 261 const extern_type* __tmp_from = __from; 262 size_t __conv = mbsnrtowcs(__to, &__from, 263 __from_chunk_end - __from, 264 __max, &__state); 265 if (__conv == static_cast<size_t>(-1)) 266 { 267 // In case of error, in order to stop at the exact place we 268 // have to start again from the beginning with a series of 269 // mbrtowc. 270 for (__from = __tmp_from;; __from += __conv) 271 { 272 __conv = mbrtowc(0, __from, __end - __from, 273 &__tmp_state); 274 if (__conv == static_cast<size_t>(-1) 275 || __conv == static_cast<size_t>(-2)) 276 break; 277 } 278 __state = __tmp_state; 279 __ret += __from - __tmp_from; 280 break; 281 } 282 if (!__from) 283 __from = __from_chunk_end; 284 285 __ret += __from - __tmp_from; 286 __max -= __conv; 287 288 if (__from < __end && __max) 289 { 290 // XXX Probably wrong for stateful encodings 291 __tmp_state = __state; 292 ++__from; 293 ++__ret; 294 --__max; 295 } 296 } 297 298 #if __GLIBC__ > 2 || (__GLIBC__ == 2 && __GLIBC_MINOR__ > 2) 299 __uselocale(__old); 300 #endif 301 302 return __ret; 303 } 304 #endif 305 306 _GLIBCXX_END_NAMESPACE_VERSION 307 } // namespace 308