1 // std::codecvt implementation details, GNU version -*- C++ -*-
2 
3 // Copyright (C) 2002-2015 Free Software Foundation, Inc.
4 //
5 // This file is part of the GNU ISO C++ Library.  This library is free
6 // software; you can redistribute it and/or modify it under the
7 // terms of the GNU General Public License as published by the
8 // Free Software Foundation; either version 3, or (at your option)
9 // any later version.
10 
11 // This library is distributed in the hope that it will be useful,
12 // but WITHOUT ANY WARRANTY; without even the implied warranty of
13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14 // GNU General Public License for more details.
15 
16 // Under Section 7 of GPL version 3, you are granted additional
17 // permissions described in the GCC Runtime Library Exception, version
18 // 3.1, as published by the Free Software Foundation.
19 
20 // You should have received a copy of the GNU General Public License and
21 // a copy of the GCC Runtime Library Exception along with this program;
22 // see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
23 // <http://www.gnu.org/licenses/>.
24 
25 //
26 // ISO C++ 14882: 22.2.1.5 - Template class codecvt
27 //
28 
29 // Written by Benjamin Kosnik <bkoz@redhat.com>
30 
31 #include <locale>
32 #include <cstdlib>  // For MB_CUR_MAX
33 #include <climits>  // For MB_LEN_MAX
34 #include <bits/c++locale_internal.h>
35 
36 namespace std _GLIBCXX_VISIBILITY(default)
37 {
38 _GLIBCXX_BEGIN_NAMESPACE_VERSION
39 
40   // Specializations.
41 #ifdef _GLIBCXX_USE_WCHAR_T
42   codecvt_base::result
43   codecvt<wchar_t, char, mbstate_t>::
do_out(state_type & __state,const intern_type * __from,const intern_type * __from_end,const intern_type * & __from_next,extern_type * __to,extern_type * __to_end,extern_type * & __to_next) const44   do_out(state_type& __state, const intern_type* __from,
45 	 const intern_type* __from_end, const intern_type*& __from_next,
46 	 extern_type* __to, extern_type* __to_end,
47 	 extern_type*& __to_next) const
48   {
49     result __ret = ok;
50     state_type __tmp_state(__state);
51 
52 #if __GLIBC__ > 2 || (__GLIBC__ == 2 && __GLIBC_MINOR__ > 2)
53     __c_locale __old = __uselocale(_M_c_locale_codecvt);
54 #endif
55 
56     // wcsnrtombs is *very* fast but stops if encounters NUL characters:
57     // in case we fall back to wcrtomb and then continue, in a loop.
58     // NB: wcsnrtombs is a GNU extension
59     for (__from_next = __from, __to_next = __to;
60 	 __from_next < __from_end && __to_next < __to_end
61 	 && __ret == ok;)
62       {
63 	const intern_type* __from_chunk_end = wmemchr(__from_next, L'\0',
64 						      __from_end - __from_next);
65 	if (!__from_chunk_end)
66 	  __from_chunk_end = __from_end;
67 
68 	__from = __from_next;
69 	const size_t __conv = wcsnrtombs(__to_next, &__from_next,
70 					 __from_chunk_end - __from_next,
71 					 __to_end - __to_next, &__state);
72 	if (__conv == static_cast<size_t>(-1))
73 	  {
74 	    // In case of error, in order to stop at the exact place we
75 	    // have to start again from the beginning with a series of
76 	    // wcrtomb.
77 	    for (; __from < __from_next; ++__from)
78 	      __to_next += wcrtomb(__to_next, *__from, &__tmp_state);
79 	    __state = __tmp_state;
80 	    __ret = error;
81 	  }
82 	else if (__from_next && __from_next < __from_chunk_end)
83 	  {
84 	    __to_next += __conv;
85 	    __ret = partial;
86 	  }
87 	else
88 	  {
89 	    __from_next = __from_chunk_end;
90 	    __to_next += __conv;
91 	  }
92 
93 	if (__from_next < __from_end && __ret == ok)
94 	  {
95 	    extern_type __buf[MB_LEN_MAX];
96 	    __tmp_state = __state;
97 	    const size_t __conv2 = wcrtomb(__buf, *__from_next, &__tmp_state);
98 	    if (__conv2 > static_cast<size_t>(__to_end - __to_next))
99 	      __ret = partial;
100 	    else
101 	      {
102 		memcpy(__to_next, __buf, __conv2);
103 		__state = __tmp_state;
104 		__to_next += __conv2;
105 		++__from_next;
106 	      }
107 	  }
108       }
109 
110 #if __GLIBC__ > 2 || (__GLIBC__ == 2 && __GLIBC_MINOR__ > 2)
111     __uselocale(__old);
112 #endif
113 
114     return __ret;
115   }
116 
117   codecvt_base::result
118   codecvt<wchar_t, char, mbstate_t>::
do_in(state_type & __state,const extern_type * __from,const extern_type * __from_end,const extern_type * & __from_next,intern_type * __to,intern_type * __to_end,intern_type * & __to_next) const119   do_in(state_type& __state, const extern_type* __from,
120 	const extern_type* __from_end, const extern_type*& __from_next,
121 	intern_type* __to, intern_type* __to_end,
122 	intern_type*& __to_next) const
123   {
124     result __ret = ok;
125     state_type __tmp_state(__state);
126 
127 #if __GLIBC__ > 2 || (__GLIBC__ == 2 && __GLIBC_MINOR__ > 2)
128     __c_locale __old = __uselocale(_M_c_locale_codecvt);
129 #endif
130 
131     // mbsnrtowcs is *very* fast but stops if encounters NUL characters:
132     // in case we store a L'\0' and then continue, in a loop.
133     // NB: mbsnrtowcs is a GNU extension
134     for (__from_next = __from, __to_next = __to;
135 	 __from_next < __from_end && __to_next < __to_end
136 	 && __ret == ok;)
137       {
138 	const extern_type* __from_chunk_end;
139 	__from_chunk_end = static_cast<const extern_type*>(memchr(__from_next, '\0',
140 								  __from_end
141 								  - __from_next));
142 	if (!__from_chunk_end)
143 	  __from_chunk_end = __from_end;
144 
145 	__from = __from_next;
146 	size_t __conv = mbsnrtowcs(__to_next, &__from_next,
147 				   __from_chunk_end - __from_next,
148 				   __to_end - __to_next, &__state);
149 	if (__conv == static_cast<size_t>(-1))
150 	  {
151 	    // In case of error, in order to stop at the exact place we
152 	    // have to start again from the beginning with a series of
153 	    // mbrtowc.
154 	    for (;; ++__to_next, __from += __conv)
155 	      {
156 		__conv = mbrtowc(__to_next, __from, __from_end - __from,
157 				 &__tmp_state);
158 		if (__conv == static_cast<size_t>(-1)
159 		    || __conv == static_cast<size_t>(-2))
160 		  break;
161 	      }
162 	    __from_next = __from;
163 	    __state = __tmp_state;
164 	    __ret = error;
165 	  }
166 	else if (__from_next && __from_next < __from_chunk_end)
167 	  {
168 	    // It is unclear what to return in this case (see DR 382).
169 	    __to_next += __conv;
170 	    __ret = partial;
171 	  }
172 	else
173 	  {
174 	    __from_next = __from_chunk_end;
175 	    __to_next += __conv;
176 	  }
177 
178 	if (__from_next < __from_end && __ret == ok)
179 	  {
180 	    if (__to_next < __to_end)
181 	      {
182 		// XXX Probably wrong for stateful encodings
183 		__tmp_state = __state;
184 		++__from_next;
185 		*__to_next++ = L'\0';
186 	      }
187 	    else
188 	      __ret = partial;
189 	  }
190       }
191 
192 #if __GLIBC__ > 2 || (__GLIBC__ == 2 && __GLIBC_MINOR__ > 2)
193     __uselocale(__old);
194 #endif
195 
196     return __ret;
197   }
198 
199   int
200   codecvt<wchar_t, char, mbstate_t>::
do_encoding() const201   do_encoding() const throw()
202   {
203     // XXX This implementation assumes that the encoding is
204     // stateless and is either single-byte or variable-width.
205     int __ret = 0;
206 #if __GLIBC__ > 2 || (__GLIBC__ == 2 && __GLIBC_MINOR__ > 2)
207     __c_locale __old = __uselocale(_M_c_locale_codecvt);
208 #endif
209     if (MB_CUR_MAX == 1)
210       __ret = 1;
211 #if __GLIBC__ > 2 || (__GLIBC__ == 2 && __GLIBC_MINOR__ > 2)
212     __uselocale(__old);
213 #endif
214     return __ret;
215   }
216 
217   int
218   codecvt<wchar_t, char, mbstate_t>::
do_max_length() const219   do_max_length() const throw()
220   {
221 #if __GLIBC__ > 2 || (__GLIBC__ == 2 && __GLIBC_MINOR__ > 2)
222     __c_locale __old = __uselocale(_M_c_locale_codecvt);
223 #endif
224     // XXX Probably wrong for stateful encodings.
225     int __ret = MB_CUR_MAX;
226 #if __GLIBC__ > 2 || (__GLIBC__ == 2 && __GLIBC_MINOR__ > 2)
227     __uselocale(__old);
228 #endif
229     return __ret;
230   }
231 
232   int
233   codecvt<wchar_t, char, mbstate_t>::
do_length(state_type & __state,const extern_type * __from,const extern_type * __end,size_t __max) const234   do_length(state_type& __state, const extern_type* __from,
235 	    const extern_type* __end, size_t __max) const
236   {
237     int __ret = 0;
238     state_type __tmp_state(__state);
239 
240 #if __GLIBC__ > 2 || (__GLIBC__ == 2 && __GLIBC_MINOR__ > 2)
241     __c_locale __old = __uselocale(_M_c_locale_codecvt);
242 #endif
243 
244     // mbsnrtowcs is *very* fast but stops if encounters NUL characters:
245     // in case we advance past it and then continue, in a loop.
246     // NB: mbsnrtowcs is a GNU extension
247 
248     // A dummy internal buffer is needed in order for mbsnrtocws to consider
249     // its fourth parameter (it wouldn't with NULL as first parameter).
250     wchar_t* __to = static_cast<wchar_t*>(__builtin_alloca(sizeof(wchar_t)
251 							   * __max));
252     while (__from < __end && __max)
253       {
254 	const extern_type* __from_chunk_end;
255 	__from_chunk_end = static_cast<const extern_type*>(memchr(__from, '\0',
256 								  __end
257 								  - __from));
258 	if (!__from_chunk_end)
259 	  __from_chunk_end = __end;
260 
261 	const extern_type* __tmp_from = __from;
262 	size_t __conv = mbsnrtowcs(__to, &__from,
263 				   __from_chunk_end - __from,
264 				   __max, &__state);
265 	if (__conv == static_cast<size_t>(-1))
266 	  {
267 	    // In case of error, in order to stop at the exact place we
268 	    // have to start again from the beginning with a series of
269 	    // mbrtowc.
270 	    for (__from = __tmp_from;; __from += __conv)
271 	      {
272 		__conv = mbrtowc(0, __from, __end - __from,
273 				 &__tmp_state);
274 		if (__conv == static_cast<size_t>(-1)
275 		    || __conv == static_cast<size_t>(-2))
276 		  break;
277 	      }
278 	    __state = __tmp_state;
279 	    __ret += __from - __tmp_from;
280 	    break;
281 	  }
282 	if (!__from)
283 	  __from = __from_chunk_end;
284 
285 	__ret += __from - __tmp_from;
286 	__max -= __conv;
287 
288 	if (__from < __end && __max)
289 	  {
290 	    // XXX Probably wrong for stateful encodings
291 	    __tmp_state = __state;
292 	    ++__from;
293 	    ++__ret;
294 	    --__max;
295 	  }
296       }
297 
298 #if __GLIBC__ > 2 || (__GLIBC__ == 2 && __GLIBC_MINOR__ > 2)
299     __uselocale(__old);
300 #endif
301 
302     return __ret;
303   }
304 #endif
305 
306 _GLIBCXX_END_NAMESPACE_VERSION
307 } // namespace
308