1 /*  Copyright (C) 2002     Manuel Novoa III
2  *
3  *  This library is free software; you can redistribute it and/or
4  *  modify it under the terms of the GNU Library General Public
5  *  License as published by the Free Software Foundation; either
6  *  version 2 of the License, or (at your option) any later version.
7  *
8  *  This library is distributed in the hope that it will be useful,
9  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
10  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  *  Library General Public License for more details.
12  *
13  *  You should have received a copy of the GNU Library General Public
14  *  License along with this library; if not, see
15  *  <http://www.gnu.org/licenses/>.
16  */
17 
18 /* Nov. 1, 2002
19  * Reworked setlocale() return values and locale arg processing to
20  *   be more like glibc.  Applications expecting to be able to
21  *   query locale settings should now work... at the cost of almost
22  *   doubling the size of the setlocale object code.
23  * Fixed a bug in the internal fixed-size-string locale specifier code.
24  *
25  * Dec 20, 2002
26  * Added in collation support and updated stub nl_langinfo.
27  *
28  * Aug 1, 2003
29  * Added glibc-like extended locale stuff (newlocale, duplocale, etc).
30  *
31  * Aug 18, 2003
32  * Bug in duplocale... collation data wasn't copied.
33  * Bug in newlocale... translate 1<<LC_ALL to LC_ALL_MASK.
34  * Bug in _wchar_utf8sntowcs... fix cut-n-paste error.
35  *
36  * Aug 31, 2003
37  * Hack around bg_BG bug; grouping specified but no thousands separator.
38  * Also, disable the locale link_warnings for now, as they generate a
39  * lot of noise when using libstd++.
40  */
41 
42 
43 /*  TODO:
44  *  Implement the shared mmap code so non-mmu platforms can use this.
45  *  Add some basic collate functionality similar to what the previous
46  *    locale support had (8-bit codesets only).
47  */
48 
49 #define __CTYPE_HAS_8_BIT_LOCALES 1
50 
51 #include <string.h>
52 #include <stdlib.h>
53 #include <stddef.h>
54 #include <limits.h>
55 #include <stdint.h>
56 #include <assert.h>
57 #include <errno.h>
58 #include <ctype.h>
59 #include <stdio.h>
60 
61 #ifdef __UCLIBC_MJN3_ONLY__
62 #ifdef L_setlocale
63 #warning TODO: Make the link_warning()s a config option?
64 #endif
65 #endif
66 #undef link_warning
67 #define link_warning(A,B)
68 
69 #undef __LOCALE_C_ONLY
70 #ifndef __UCLIBC_HAS_LOCALE__
71 #define __LOCALE_C_ONLY
72 #endif /* __UCLIBC_HAS_LOCALE__ */
73 
74 
75 #ifdef __LOCALE_C_ONLY
76 
77 #include <locale.h>
78 
79 #else  /* __LOCALE_C_ONLY */
80 
81 #ifdef __UCLIBC_MJN3_ONLY__
82 #ifdef L_setlocale
83 #warning TODO: Fix the __CTYPE_HAS_8_BIT_LOCALES define at the top of the file.
84 #warning TODO: Fix __WCHAR_ENABLED.
85 #endif
86 #endif
87 
88 /* Need to include this before locale.h! */
89 #include <bits/uClibc_locale.h>
90 
91 #undef CODESET_LIST
92 #define CODESET_LIST			(__locale_mmap->codeset_list)
93 
94 #ifdef __UCLIBC_HAS_XLOCALE__
95 #include <locale.h>
96 #else /* __UCLIBC_HAS_XLOCALE__ */
97 /* We need this internally... */
98 #define __UCLIBC_HAS_XLOCALE__ 1
99 #include <locale.h>
100 #undef __UCLIBC_HAS_XLOCALE__
101 #endif /* __UCLIBC_HAS_XLOCALE__ */
102 
103 #include <wchar.h>
104 
105 #define LOCALE_NAMES			(__locale_mmap->locale_names5)
106 #define LOCALES					(__locale_mmap->locales)
107 #define LOCALE_AT_MODIFIERS		(__locale_mmap->locale_at_modifiers)
108 #define CATEGORY_NAMES			(__locale_mmap->lc_names)
109 
110 #ifdef __UCLIBC_MJN3_ONLY__
111 #warning REMINDER: redo the MAX_LOCALE_STR stuff...
112 #endif
113 #define MAX_LOCALE_STR			256 /* TODO: Only sufficient for current case. */
114 #define MAX_LOCALE_CATEGORY_STR	32 /* TODO: Only sufficient for current case. */
115 /* Note: Best if MAX_LOCALE_CATEGORY_STR is a power of 2. */
116 
117 extern int _locale_set_l(const unsigned char *p, __locale_t base) attribute_hidden;
118 extern void _locale_init_l(__locale_t base) attribute_hidden;
119 
120 #endif /* __LOCALE_C_ONLY */
121 
122 #undef LOCALE_STRING_SIZE
123 #define LOCALE_SELECTOR_SIZE (2 * __LC_ALL + 2)
124 
125 #ifdef __UCLIBC_MJN3_ONLY__
126 #ifdef L_setlocale
127 #warning TODO: Create a C locale selector string.
128 #endif
129 #endif
130 #define C_LOCALE_SELECTOR "\x23\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80"
131 
132 
133 #include <langinfo.h>
134 #include <nl_types.h>
135 
136 /**********************************************************************/
137 #ifdef L_setlocale
138 
139 #ifdef __LOCALE_C_ONLY
140 
141 link_warning(setlocale,"REMINDER: The 'setlocale' function supports only C|POSIX locales.")
142 
143 static const char C_string[] = "C";
144 
setlocale(int category,register const char * locale)145 char *setlocale(int category, register const char *locale)
146 {
147 	return ( (((unsigned int)(category)) <= LC_ALL)
148 			 && ( (!locale)		/* Request for locale category string. */
149 				  || (!*locale)	/* Implementation-defined default is C. */
150 				  || ((*locale == 'C') && !locale[1])
151 				  || (!strcmp(locale, "POSIX"))) )
152 		? (char *) C_string		/* Always in C/POSIX locale. */
153 		: NULL;
154 }
155 
156 #else /* ---------------------------------------------- __LOCALE_C_ONLY */
157 
158 #ifdef __UCLIBC_HAS_THREADS__
159 link_warning(setlocale,"REMINDER: The 'setlocale' function is _not_ threadsafe except for simple queries.")
160 #endif
161 
162 #if !defined(__LOCALE_DATA_NUM_LOCALES) || (__LOCALE_DATA_NUM_LOCALES <= 1)
163 #error locales enabled, but not data other than for C locale!
164 #endif
165 
166 #ifdef __UCLIBC_MJN3_ONLY__
167 #warning TODO: Move posix and utf8 strings.
168 #endif
169 static const char posix[] = "POSIX";
170 static const char utf8[] = "UTF-8";
171 
172 #ifdef __UCLIBC_MJN3_ONLY__
173 #warning TODO: Fix dimensions of hr_locale.
174 #endif
175 /* Individual category strings start at hr_locale + category * MAX_LOCALE_CATEGORY.
176  * This holds for LC_ALL as well.
177  */
178 static char hr_locale[(MAX_LOCALE_CATEGORY_STR * LC_ALL) + MAX_LOCALE_STR];
179 
180 
update_hr_locale(const unsigned char * spec)181 static void update_hr_locale(const unsigned char *spec)
182 {
183 	const unsigned char *loc;
184 	const unsigned char *s;
185 	char *n;
186 	int i, category, done;
187 
188 	done = category = 0;
189 	do {
190 		s = spec + 1;
191 		n = hr_locale + category * MAX_LOCALE_CATEGORY_STR;
192 
193 		if (category == LC_ALL) {
194 			done = 1;
195 			for (i = 0 ; i < LC_ALL-1 ; i += 2) {
196 				if ((s[i] != s[i+2]) || (s[i+1] != s[i+3])) {
197 					goto SKIP;
198 				}
199 			}
200 			/* All categories the same, so simplify string by using a single
201 			 * category. */
202 			category = LC_CTYPE;
203 		}
204 
205 	SKIP:
206 		i = (category == LC_ALL) ? 0 : category;
207 		s += 2*i;
208 
209 		do {
210 			if ((*s != 0xff) || (s[1] != 0xff)) {
211 				loc = LOCALES
212 					+ __LOCALE_DATA_WIDTH_LOCALES * ((((int)(*s & 0x7f)) << 7)
213 													 + (s[1] & 0x7f));
214 				if (category == LC_ALL) {
215 					/* CATEGORY_NAMES is unsigned char* */
216 					n = stpcpy(n, (char*) CATEGORY_NAMES + (int) CATEGORY_NAMES[i]);
217 					*n++ = '=';
218 				}
219 				if (*loc == 0) {
220 					*n++ = 'C';
221 					*n = 0;
222 				} else {
223 					char at = 0;
224 					memcpy(n, LOCALE_NAMES + 5*((*loc)-1), 5);
225 					if (n[2] != '_') {
226 						at = n[2];
227 						n[2] = '_';
228 					}
229 					n += 5;
230 					*n++ = '.';
231 					if (loc[2] == 2) {
232 						n = stpcpy(n, utf8);
233 					} else if (loc[2] >= 3) {
234 						n = stpcpy(n, (char*) CODESET_LIST + (int)(CODESET_LIST[loc[2] - 3]));
235 					}
236 					if (at) {
237 						const char *q;
238 						*n++ = '@';
239 						q = (char*) LOCALE_AT_MODIFIERS;
240 						do {
241 							if (q[1] == at) {
242 								n = stpcpy(n, q+2);
243 								break;
244 							}
245 							q += 2 + *q;
246 						} while (*q);
247 					}
248 				}
249 				*n++ = ';';
250 			}
251 			s += 2;
252 		} while (++i < category);
253 		*--n = 0;		/* Remove trailing ';' and nul-terminate. */
254 
255 		++category;
256 	} while (!done);
257 }
258 
setlocale(int category,const char * locale)259 char *setlocale(int category, const char *locale)
260 {
261 	if (((unsigned int)(category)) > LC_ALL) {
262 #if 0
263 		__set_errno(EINVAL);	/* glibc sets errno -- SUSv3 doesn't say. */
264 #endif
265 		return NULL;			/* Illegal/unsupported category. */
266 	}
267 
268 	if (locale != NULL) {		/* Not just a query... */
269 		if (!newlocale((1 << category), locale, __global_locale)) {
270 			return NULL;		/* Failed! */
271 		}
272 		update_hr_locale(__global_locale->cur_locale);
273 	}
274 
275 	/* Either a query or a successful set, so return current locale string. */
276 	return hr_locale + (category * MAX_LOCALE_CATEGORY_STR);
277 }
278 
279 #endif /* __LOCALE_C_ONLY */
280 
281 #endif
282 /**********************************************************************/
283 #ifdef L_localeconv
284 
285 /* Note: We assume here that the compiler does the sane thing regarding
286  * placement of the fields in the struct.  If necessary, we could ensure
287  * this usings an array of offsets but at some size cost. */
288 
289 
290 #ifdef __LOCALE_C_ONLY
291 
292 link_warning(localeconv,"REMINDER: The 'localeconv' function is hardwired for C/POSIX locale only.")
293 
294 static struct lconv the_lconv;
295 
296 static const char decpt[] = ".";
297 
localeconv(void)298 struct lconv *localeconv(void)
299 {
300 	register char *p = (char *)(&the_lconv);
301 
302 	*((char **)p) = (char *) decpt;
303 	do {
304 		p += sizeof(char **);
305 		*((char **)p) = (char *) (decpt+1);
306 	} while (p < (char *) &the_lconv.negative_sign);
307 
308 	p = (&the_lconv.int_frac_digits);
309 	do {
310 		*p = CHAR_MAX;
311 		++p;
312 	} while (p <= &the_lconv.int_n_sign_posn);
313 
314 	return &the_lconv;
315 }
316 
317 #else /* __LOCALE_C_ONLY */
318 
319 static struct lconv the_lconv;
320 
localeconv(void)321 struct lconv *localeconv(void)
322 {
323 	register char *p = (char *) &the_lconv;
324 	register char **q = (char **) &(__UCLIBC_CURLOCALE->decimal_point);
325 
326 	do {
327 		*((char **)p) = *q;
328 		p += sizeof(char **);
329 		++q;
330 	} while (p < &the_lconv.int_frac_digits);
331 
332 	do {
333 		*p = **q;
334 		++p;
335 		++q;
336 	} while (p <= &the_lconv.int_n_sign_posn);
337 
338 	return &the_lconv;
339 }
340 
341 #endif /* __LOCALE_C_ONLY */
342 
343 libc_hidden_def(localeconv)
344 
345 #endif
346 /**********************************************************************/
347 #if defined(L__locale_init) && !defined(__LOCALE_C_ONLY)
348 
349 struct __uclibc_locale_struct __global_locale_data;
350 
351 __locale_t __global_locale = &__global_locale_data;
352 
353 #ifdef __UCLIBC_HAS_XLOCALE__
354 __locale_t __curlocale_var = &__global_locale_data;
355 #endif
356 
357 /*----------------------------------------------------------------------*/
358 #ifdef __UCLIBC_MJN3_ONLY__
359 #warning TODO: Move utf8 and ascii strings.
360 #endif
361 static const char utf8[] = "UTF-8";
362 static const char ascii[] = "ASCII";
363 
364 typedef struct {
365 	uint16_t num_base;
366 	uint16_t num_der;
367 	uint16_t MAX_WEIGHTS;
368 	uint16_t num_index2weight;
369 #define num_index2ruleidx num_index2weight
370 	uint16_t num_weightstr;
371 	uint16_t num_multistart;
372 	uint16_t num_override;
373 	uint16_t num_ruletable;
374 } coldata_header_t;
375 
376 typedef struct {
377 	uint16_t num_weights;
378 	uint16_t num_starters;
379 	uint16_t ii_shift;
380 	uint16_t ti_shift;
381 	uint16_t ii_len;
382 	uint16_t ti_len;
383 	uint16_t max_weight;
384 	uint16_t num_col_base;
385 	uint16_t max_col_index;
386 	uint16_t undefined_idx;
387 	uint16_t range_low;
388 	uint16_t range_count;
389 	uint16_t range_base_weight;
390 	uint16_t range_rule_offset;
391 
392 	uint16_t index2weight_offset;
393 	uint16_t index2ruleidx_offset;
394 	uint16_t multistart_offset;
395 	uint16_t wcs2colidt_offset_low;
396 	uint16_t wcs2colidt_offset_hi;
397 } coldata_base_t;
398 
399 typedef struct {
400 	uint16_t base_idx;
401 	uint16_t undefined_idx;
402 	uint16_t overrides_offset;
403 	uint16_t multistart_offset;
404 } coldata_der_t;
405 
init_cur_collate(int der_num,__collate_t * cur_collate)406 static int init_cur_collate(int der_num, __collate_t *cur_collate)
407 {
408 	const uint16_t *__locale_collate_tbl = __locale_mmap->collate_data;
409 	coldata_header_t *cdh;
410 	coldata_base_t *cdb;
411 	coldata_der_t *cdd;
412 	const uint16_t *p;
413 	size_t n;
414 	uint16_t i, w;
415 
416 #ifdef __UCLIBC_MJN3_ONLY__
417 #warning kill of x86-specific asserts
418 #endif
419 #if 0
420 	assert(sizeof(coldata_base_t) == 19*2);
421 	assert(sizeof(coldata_der_t) == 4*2);
422 	assert(sizeof(coldata_header_t) == 8*2);
423 #endif
424 
425 	if (!der_num) { 			/* C locale... special */
426 		cur_collate->num_weights = 0;
427 		return 1;
428 	}
429 
430 	--der_num;
431 
432 	cdh = (coldata_header_t *) __locale_collate_tbl;
433 
434 #ifdef __UCLIBC_MJN3_ONLY__
435 #warning CONSIDER: Should we assert here?
436 #endif
437 #if 0
438 	if (der_num >= cdh->num_der) {
439 		return 0;
440 	}
441 #else
442 	assert((der_num < cdh->num_der));
443 #endif
444 
445 	cdd = (coldata_der_t *)(__locale_collate_tbl
446 							+ (sizeof(coldata_header_t)
447 							   + cdh->num_base * sizeof(coldata_base_t)
448 							   + der_num * sizeof(coldata_der_t)
449 							   )/2 );
450 
451 	cdb = (coldata_base_t *)(__locale_collate_tbl
452 							 + (sizeof(coldata_header_t)
453 								+ cdd->base_idx * sizeof(coldata_base_t)
454 								)/2 );
455 
456 	memcpy(cur_collate, cdb, offsetof(coldata_base_t,index2weight_offset));
457 	cur_collate->undefined_idx = cdd->undefined_idx;
458 
459 	cur_collate->ti_mask = (1 << cur_collate->ti_shift)-1;
460 	cur_collate->ii_mask = (1 << cur_collate->ii_shift)-1;
461 
462 /* 	fflush(stdout); */
463 /* 	fprintf(stderr,"base=%d  num_col_base: %d  %d\n", cdd->base_idx ,cur_collate->num_col_base, cdb->num_col_base); */
464 
465 	n = (sizeof(coldata_header_t) + cdh->num_base * sizeof(coldata_base_t)
466 		 + cdh->num_der * sizeof(coldata_der_t))/2;
467 
468 /* 	fprintf(stderr,"n   = %d\n", n); */
469 	cur_collate->index2weight_tbl = __locale_collate_tbl + n + cdb->index2weight_offset;
470 /* 	fprintf(stderr,"i2w = %d\n", n + cdb->index2weight_offset); */
471 	n += cdh->num_index2weight;
472 	cur_collate->index2ruleidx_tbl = __locale_collate_tbl + n + cdb->index2ruleidx_offset;
473 /* 	fprintf(stderr,"i2r = %d\n", n + cdb->index2ruleidx_offset); */
474 	n += cdh->num_index2ruleidx;
475 	cur_collate->multistart_tbl = __locale_collate_tbl + n + cdd->multistart_offset;
476 /* 	fprintf(stderr,"mts = %d\n", n + cdb->multistart_offset); */
477 	n += cdh->num_multistart;
478 	cur_collate->overrides_tbl = __locale_collate_tbl + n + cdd->overrides_offset;
479 /* 	fprintf(stderr,"ovr = %d\n", n + cdd->overrides_offset); */
480 	n += cdh->num_override;
481 	cur_collate->ruletable = __locale_collate_tbl + n;
482 /* 	fprintf(stderr, "rtb = %d\n", n); */
483 	n += cdh->num_ruletable;
484 	cur_collate->weightstr = __locale_collate_tbl + n;
485 /* 	fprintf(stderr,"wts = %d\n", n); */
486 	n += cdh->num_weightstr;
487 	cur_collate->wcs2colidt_tbl = __locale_collate_tbl + n
488 		+ (((unsigned long)(cdb->wcs2colidt_offset_hi)) << 16)
489 		+ cdb->wcs2colidt_offset_low;
490 /* 	fprintf(stderr,"wcs = %lu\n", n	+ (((unsigned long)(cdb->wcs2colidt_offset_hi)) << 16) */
491 /* 			+ cdb->wcs2colidt_offset_low); */
492 
493 	cur_collate->MAX_WEIGHTS = cdh->MAX_WEIGHTS;
494 
495 #ifdef __UCLIBC_MJN3_ONLY__
496 #warning CONSIDER: Fix the +1 by increasing max_col_index?
497 #warning CONSIDER: Since this collate info is dependent only on LC_COLLATE ll_cc and not on codeset, we could just globally allocate this for each in a table
498 #endif
499 
500 	cur_collate->index2weight = calloc(2*cur_collate->max_col_index+2,
501 									   sizeof(uint16_t));
502 	if (!cur_collate->index2weight) {
503 		return 0;
504 	}
505 	cur_collate->index2ruleidx = cur_collate->index2weight
506 		+ cur_collate->max_col_index + 1;
507 
508 	memcpy(cur_collate->index2weight, cur_collate->index2weight_tbl,
509 		   cur_collate->num_col_base * sizeof(uint16_t));
510 	memcpy(cur_collate->index2ruleidx, cur_collate->index2ruleidx_tbl,
511 		   cur_collate->num_col_base * sizeof(uint16_t));
512 
513 	/* now do the overrides */
514 	p = cur_collate->overrides_tbl;
515 	while (*p > 1) {
516 /* 		fprintf(stderr, "processing override -- count = %d\n", *p); */
517 		n = *p++;
518 		w = *p++;
519 		do {
520 			i = *p++;
521 /* 			fprintf(stderr, "	i=%d (%#x) w=%d *p=%d\n", i, i, w, *p); */
522 			cur_collate->index2weight[i-1] = w++;
523 			cur_collate->index2ruleidx[i-1] = *p++;
524 		} while (--n);
525 	}
526 	assert(*p == 1);
527 	while (*++p) {
528 		i = *p;
529 /* 		fprintf(stderr, "	i=%d (%#x) w=%d *p=%d\n", i, i, p[1], p[2]); */
530 		cur_collate->index2weight[i-1] = *++p;
531 		cur_collate->index2ruleidx[i-1] = *++p;
532 	}
533 
534 
535 	for (i=0 ; i < cur_collate->multistart_tbl[0] ; i++) {
536 		p = cur_collate->multistart_tbl;
537 /* 		fprintf(stderr, "%2d of %2d: %d ", i,  cur_collate->multistart_tbl[0], p[i]); */
538 		p += p[i];
539 
540 		do {
541 			n = *p++;
542 			do {
543 				if (!*p) {		/* found it */
544 /* 					fprintf(stderr, "found: n=%d (%#lx) |%.*ls|\n", n, (int) *cs->s, n, cs->s); */
545 /* 					fprintf(stderr, ": %d - single\n", n); */
546 					goto FOUND;
547  				}
548 				/* the lookup check here is safe since we're assured that *p is a valid colidex */
549 /* 				fprintf(stderr, "lookup(%lc)==%d  *p==%d\n", cs->s[n], lookup(cs->s[n]), (int) *p); */
550 /* 				fprintf(stderr, ": %d - ", n); */
551 				do {
552 /* 					fprintf(stderr, "%d|",  *p); */
553 				} while (*p++);
554 				break;
555 			} while (1);
556 		} while (1);
557 	FOUND:
558 		continue;
559 	}
560 
561 	return 1;
562 }
563 
_locale_set_l(const unsigned char * p,__locale_t base)564 int attribute_hidden _locale_set_l(const unsigned char *p, __locale_t base)
565 {
566 	const char **x;
567 	unsigned char *s = base->cur_locale + 1;
568 	const size_t *stp;
569 	const unsigned char *r;
570 	const uint16_t *io;
571 	const uint16_t *ii;
572 	const unsigned char *d;
573 	int row;					/* locale row */
574 	int crow;					/* category row */
575 	int len;
576 	int c;
577 	int i = 0;
578 	__collate_t newcol;
579 
580 	++p;
581 
582 	newcol.index2weight = NULL;
583 	if ((p[2*LC_COLLATE] != s[2*LC_COLLATE])
584 		|| (p[2*LC_COLLATE + 1] != s[2*LC_COLLATE + 1])
585 		) {
586 		row = (((int)(*p & 0x7f)) << 7) + (p[1] & 0x7f);
587 		assert(row < __LOCALE_DATA_NUM_LOCALES);
588 		if (!init_cur_collate(__locale_mmap->locales[ __LOCALE_DATA_WIDTH_LOCALES
589 													  * row + 3 + LC_COLLATE ],
590 							  &newcol)
591 			) {
592 			return 0;			/* calloc failed. */
593 		}
594 		free(base->collate.index2weight);
595 		memcpy(&base->collate, &newcol, sizeof(__collate_t));
596 	}
597 
598 	do {
599 		if ((*p != *s) || (p[1] != s[1])) {
600 			row = (((int)(*p & 0x7f)) << 7) + (p[1] & 0x7f);
601 			assert(row < __LOCALE_DATA_NUM_LOCALES);
602 
603 			*s = *p;
604 			s[1] = p[1];
605 
606 			if ((i != LC_COLLATE)
607 				&& ((len = __locale_mmap->lc_common_item_offsets_LEN[i]) != 0)
608 				) {
609 				crow = __locale_mmap->locales[ __LOCALE_DATA_WIDTH_LOCALES * row
610 											   + 3 + i ]
611 					* len;
612 
613 				x = (const char **)(((char *) base)
614                                     + base->category_offsets[i]);
615 
616 				stp = __locale_mmap->lc_common_tbl_offsets + 4*i;
617 				r = (const unsigned char *)( ((char *)__locale_mmap) + *stp );
618 				io = (const uint16_t *)( ((char *)__locale_mmap) + *++stp );
619 				ii = (const uint16_t *)( ((char *)__locale_mmap) + *++stp );
620 				d = (const unsigned char *)( ((char *)__locale_mmap) + *++stp );
621 				for (c = 0; c < len; c++) {
622 					x[c] = (char*)(d + ii[r[crow + c] + io[c]]);
623 				}
624 			}
625 			if (i == LC_CTYPE) {
626 				c = __locale_mmap->locales[ __LOCALE_DATA_WIDTH_LOCALES * row
627 											+ 2 ]; /* codeset */
628 				if (c <= 2) {
629 					if (c == 2) {
630 						base->codeset = utf8;
631 						base->encoding = __ctype_encoding_utf8;
632 						/* TODO - fix for bcc */
633 						base->mb_cur_max = 6;
634 					} else {
635 						assert(c == 1);
636 						base->codeset = ascii;
637 						base->encoding = __ctype_encoding_7_bit;
638 						base->mb_cur_max = 1;
639 					}
640 				} else {
641 					const __codeset_8_bit_t *c8b;
642 					r = CODESET_LIST;
643 					c -= 3;
644 					base->codeset = (char *) (r + r[c]);
645 					base->encoding = __ctype_encoding_8_bit;
646 #ifdef __UCLIBC_MJN3_ONLY__
647 #warning REMINDER: update 8 bit mb_cur_max when translit implemented!
648 #endif
649 					/* TODO - update when translit implemented! */
650 					base->mb_cur_max = 1;
651 					c8b = __locale_mmap->codeset_8_bit + c;
652 #ifdef __CTYPE_HAS_8_BIT_LOCALES
653 					base->idx8ctype = c8b->idx8ctype;
654 					base->idx8uplow = c8b->idx8uplow;
655 #ifdef __UCLIBC_HAS_WCHAR__
656 					base->idx8c2wc = c8b->idx8c2wc;
657 					base->idx8wc2c = c8b->idx8wc2c;
658 					/* translit  */
659 #endif /* __UCLIBC_HAS_WCHAR__ */
660 
661 					/* What follows is fairly bloated, but it is just a hack
662 					 * to get the 8-bit codeset ctype stuff functioning.
663 					 * All of this will be replaced in the next generation
664 					 * of locale support anyway... */
665 
666 					memcpy(base->__ctype_b_data,
667 						   __C_ctype_b - __UCLIBC_CTYPE_B_TBL_OFFSET,
668 						   (256 + __UCLIBC_CTYPE_B_TBL_OFFSET)
669 						   * sizeof(__ctype_mask_t));
670 					memcpy(base->__ctype_tolower_data,
671 						   __C_ctype_tolower - __UCLIBC_CTYPE_TO_TBL_OFFSET,
672 						   (256 + __UCLIBC_CTYPE_TO_TBL_OFFSET)
673 						   * sizeof(__ctype_touplow_t));
674 					memcpy(base->__ctype_toupper_data,
675 						   __C_ctype_toupper - __UCLIBC_CTYPE_TO_TBL_OFFSET,
676 						   (256 + __UCLIBC_CTYPE_TO_TBL_OFFSET)
677 						   * sizeof(__ctype_touplow_t));
678 
679 #define Cctype_TBL_MASK		((1 << __LOCALE_DATA_Cctype_IDX_SHIFT) - 1)
680 #define Cctype_IDX_OFFSET	(128 >> __LOCALE_DATA_Cctype_IDX_SHIFT)
681 
682 					{
683 						int u;
684 						__ctype_mask_t m;
685 
686 						for (u=0 ; u < 128 ; u++) {
687 #ifdef __LOCALE_DATA_Cctype_PACKED
688 							c = base->tbl8ctype
689 								[ ((int)(c8b->idx8ctype
690 										 [(u >> __LOCALE_DATA_Cctype_IDX_SHIFT) ])
691 								   << (__LOCALE_DATA_Cctype_IDX_SHIFT - 1))
692 								  + ((u & Cctype_TBL_MASK) >> 1)];
693 							c = (u & 1) ? (c >> 4) : (c & 0xf);
694 #else
695 							c = base->tbl8ctype
696 								[ ((int)(c8b->idx8ctype
697 										 [(u >> __LOCALE_DATA_Cctype_IDX_SHIFT) ])
698 								   << __LOCALE_DATA_Cctype_IDX_SHIFT)
699 								  + (u & Cctype_TBL_MASK) ];
700 #endif
701 
702 							m = base->code2flag[c];
703 
704 							base->__ctype_b_data
705 								[128 + __UCLIBC_CTYPE_B_TBL_OFFSET + u]
706 								= m;
707 
708 #ifdef __UCLIBC_HAS_CTYPE_SIGNED__
709 							if (((signed char)(128 + u)) != -1) {
710 								base->__ctype_b_data[__UCLIBC_CTYPE_B_TBL_OFFSET
711 													 + ((signed char)(128 + u))]
712 									= m;
713 							}
714 #endif
715 
716 							base->__ctype_tolower_data
717 								[128 + __UCLIBC_CTYPE_TO_TBL_OFFSET + u]
718 								= 128 + u;
719 							base->__ctype_toupper_data
720 								[128 + __UCLIBC_CTYPE_TO_TBL_OFFSET + u]
721 								= 128 + u;
722 
723 							if (m & (_ISlower|_ISupper)) {
724 								c = base->tbl8uplow
725 									[ ((int)(c8b->idx8uplow
726 											 [u >> __LOCALE_DATA_Cuplow_IDX_SHIFT])
727 									   << __LOCALE_DATA_Cuplow_IDX_SHIFT)
728 									  + ((128 + u)
729 										 & ((1 << __LOCALE_DATA_Cuplow_IDX_SHIFT)
730 											- 1)) ];
731 								if (m & _ISlower) {
732 									base->__ctype_toupper_data
733 										[128 + __UCLIBC_CTYPE_TO_TBL_OFFSET + u]
734 										= (unsigned char)(128 + u + c);
735 #ifdef __UCLIBC_HAS_CTYPE_SIGNED__
736 									if (((signed char)(128 + u)) != -1) {
737 										base->__ctype_toupper_data
738 											[__UCLIBC_CTYPE_TO_TBL_OFFSET
739 											 + ((signed char)(128 + u))]
740 											= (unsigned char)(128 + u + c);
741 									}
742 #endif
743 								} else {
744 									base->__ctype_tolower_data
745 										[128 + __UCLIBC_CTYPE_TO_TBL_OFFSET + u]
746 										= (unsigned char)(128 + u - c);
747 #ifdef __UCLIBC_HAS_CTYPE_SIGNED__
748 									if (((signed char)(128 + u)) != -1) {
749 										base->__ctype_tolower_data
750 											[__UCLIBC_CTYPE_TO_TBL_OFFSET
751 											 + ((signed char)(128 + u))]
752 											= (unsigned char)(128 + u - c);
753 									}
754 #endif
755 								}
756 							}
757 						}
758 					}
759 
760 #ifdef __UCLIBC_HAS_XLOCALE__
761 					base->__ctype_b = base->__ctype_b_data
762 						+ __UCLIBC_CTYPE_B_TBL_OFFSET;
763 					base->__ctype_tolower = base->__ctype_tolower_data
764 						+ __UCLIBC_CTYPE_TO_TBL_OFFSET;
765 					base->__ctype_toupper = base->__ctype_toupper_data
766 						+ __UCLIBC_CTYPE_TO_TBL_OFFSET;
767 #else /* __UCLIBC_HAS_XLOCALE__ */
768 					__ctype_b = base->__ctype_b_data
769 						+ __UCLIBC_CTYPE_B_TBL_OFFSET;
770 					__ctype_tolower = base->__ctype_tolower_data
771 						+ __UCLIBC_CTYPE_TO_TBL_OFFSET;
772 					__ctype_toupper = base->__ctype_toupper_data
773 						+ __UCLIBC_CTYPE_TO_TBL_OFFSET;
774 #endif /* __UCLIBC_HAS_XLOCALE__ */
775 
776 #endif /* __CTYPE_HAS_8_BIT_LOCALES */
777 				}
778 #ifdef __UCLIBC_MJN3_ONLY__
779 #warning TODO: Put the outdigit string length in the locale_mmap object.
780 #endif
781 				d = base->outdigit_length;
782 				x = &base->outdigit0_mb;
783 				for (c = 0 ; c < 10 ; c++) {
784 					((unsigned char *)d)[c] = strlen(x[c]);
785 					assert(d[c] > 0);
786 				}
787 			} else if (i == LC_NUMERIC) {
788 				assert(LC_NUMERIC > LC_CTYPE); /* Need ctype initialized. */
789 
790 				base->decimal_point_len
791 					= __locale_mbrtowc_l(&base->decimal_point_wc,
792 											base->decimal_point, base);
793 				assert(base->decimal_point_len > 0);
794 				assert(base->decimal_point[base->decimal_point_len] == 0);
795 
796 				if (*base->grouping) {
797 					base->thousands_sep_len
798 						= __locale_mbrtowc_l(&base->thousands_sep_wc,
799 											 base->thousands_sep, base);
800 #if 1
801 #ifdef __UCLIBC_MJN3_ONLY__
802 #warning TODO: Remove hack involving grouping without a thousep char (bg_BG).
803 #endif
804 					assert(base->thousands_sep_len >= 0);
805 					if (base->thousands_sep_len == 0) {
806 						base->grouping = base->thousands_sep; /* empty string */
807 					}
808 					assert(base->thousands_sep[base->thousands_sep_len] == 0);
809 #else
810 					assert(base->thousands_sep_len > 0);
811 					assert(base->thousands_sep[base->thousands_sep_len] == 0);
812 #endif
813 				}
814 
815 /* 			} else if (i == LC_COLLATE) { */
816 /* 				init_cur_collate(__locale_mmap->locales[ __LOCALE_DATA_WIDTH_LOCALES */
817 /* 														 * row + 3 + i ], */
818 /* 								 &base->collate); */
819 			}
820 		}
821 		++i;
822 		p += 2;
823 		s += 2;
824 	} while (i < LC_ALL);
825 
826 	return 1;
827 }
828 
829 static const uint16_t __code2flag[16] = {
830 	0,							/* unclassified = 0 */
831 	_ISprint|_ISgraph|_ISalnum|_ISalpha, /* alpha_nonupper_nonlower */
832 	_ISprint|_ISgraph|_ISalnum|_ISalpha|_ISlower, /* alpha_lower */
833 	_ISprint|_ISgraph|_ISalnum|_ISalpha|_ISlower|_ISupper, /* alpha_upper_lower */
834 	_ISprint|_ISgraph|_ISalnum|_ISalpha|_ISupper, /* alpha_upper */
835 	_ISprint|_ISgraph|_ISalnum|_ISdigit, /* digit */
836 	_ISprint|_ISgraph|_ISpunct,	/* punct */
837 	_ISprint|_ISgraph,			/* graph */
838 	_ISprint|_ISspace,			/* print_space_nonblank */
839 	_ISprint|_ISspace|_ISblank,	/* print_space_blank */
840 	         _ISspace,			/* space_nonblank_noncntrl */
841 	         _ISspace|_ISblank,	/* space_blank_noncntrl */
842 	_IScntrl|_ISspace,			/* cntrl_space_nonblank */
843 	_IScntrl|_ISspace|_ISblank,	/* cntrl_space_blank */
844 	_IScntrl					/* cntrl_nonspace */
845 };
846 
_locale_init_l(__locale_t base)847 void attribute_hidden _locale_init_l(__locale_t base)
848 {
849 	memset(base->cur_locale, 0, LOCALE_SELECTOR_SIZE);
850 	base->cur_locale[0] = '#';
851 
852 	memcpy(base->category_item_count,
853 		   __locale_mmap->lc_common_item_offsets_LEN,
854 		   LC_ALL);
855 
856 	++base->category_item_count[0]; /* Increment for codeset entry. */
857 	base->category_offsets[0] = offsetof(struct __uclibc_locale_struct, outdigit0_mb);
858 	base->category_offsets[1] = offsetof(struct __uclibc_locale_struct, decimal_point);
859 	base->category_offsets[2] = offsetof(struct __uclibc_locale_struct, int_curr_symbol);
860 	base->category_offsets[3] = offsetof(struct __uclibc_locale_struct, abday_1);
861 /*  	base->category_offsets[4] = offsetof(struct __uclibc_locale_struct, collate???); */
862 	base->category_offsets[5] = offsetof(struct __uclibc_locale_struct, yesexpr);
863 
864 #ifdef __CTYPE_HAS_8_BIT_LOCALES
865 	base->tbl8ctype
866 		= (const unsigned char *) &__locale_mmap->tbl8ctype;
867 	base->tbl8uplow
868 		= (const unsigned char *) &__locale_mmap->tbl8uplow;
869 #ifdef __UCLIBC_HAS_WCHAR__
870 	base->tbl8c2wc
871 		= (const uint16_t *) &__locale_mmap->tbl8c2wc;
872 	base->tbl8wc2c
873 		= (const unsigned char *) &__locale_mmap->tbl8wc2c;
874 	/* translit  */
875 #endif /* __UCLIBC_HAS_WCHAR__ */
876 #endif /* __CTYPE_HAS_8_BIT_LOCALES */
877 #ifdef __UCLIBC_HAS_WCHAR__
878 	base->tblwctype
879 		= (const unsigned char *) &__locale_mmap->tblwctype;
880 	base->tblwuplow
881 		= (const unsigned char *) &__locale_mmap->tblwuplow;
882 	base->tblwuplow_diff
883 		= (const int16_t *) &__locale_mmap->tblwuplow_diff;
884 /* 	base->tblwcomb */
885 /* 		= (const unsigned char *) &__locale_mmap->tblwcomb; */
886 	/* width?? */
887 #endif /* __UCLIBC_HAS_WCHAR__ */
888 
889 	/* Initially, set things up to use the global C ctype tables.
890 	 * This is correct for C (ASCII) and UTF-8 based locales (except tr_TR). */
891 #ifdef __UCLIBC_HAS_XLOCALE__
892 	base->__ctype_b = __C_ctype_b;
893 	base->__ctype_tolower = __C_ctype_tolower;
894 	base->__ctype_toupper = __C_ctype_toupper;
895 #else /* __UCLIBC_HAS_XLOCALE__ */
896 	__ctype_b = __C_ctype_b;
897 	__ctype_tolower = __C_ctype_tolower;
898 	__ctype_toupper = __C_ctype_toupper;
899 #endif /* __UCLIBC_HAS_XLOCALE__ */
900 
901 #ifdef __UCLIBC_MJN3_ONLY__
902 #warning TODO: Initialize code2flag correctly based on locale_mmap.
903 #endif
904 	base->code2flag = __code2flag;
905 
906 	_locale_set_l((unsigned char*) C_LOCALE_SELECTOR, base);
907 }
908 
_locale_init(void)909 void _locale_init(void)
910 {
911 	/* TODO: mmap the locale file  */
912 
913 	/* TODO - ??? */
914 	_locale_init_l(__global_locale);
915 }
916 
917 #endif
918 /**********************************************************************/
919 #if defined(L_nl_langinfo) || defined(L_nl_langinfo_l)
920 
921 #ifdef __LOCALE_C_ONLY
922 
923 /* We need to index 320 bytes of data, so you might initially think we
924  * need to store the offsets in shorts.  But since the offset of the
925  * 64th item is 182, we'll store "offset - 2*64" for all items >= 64
926  * and always calculate the data offset as "offset[i] + 2*(i & 64)".
927  * This allows us to pack the data offsets in an unsigned char while
928  * also avoiding an "if".
929  *
930  * Note: Category order is assumed to be:
931  *   ctype, numeric, monetary, time, collate, messages, all
932  */
933 
934 #define C_LC_ALL 6
935 
936 /* Combine the data to avoid size penalty for seperate char arrays when
937  * compiler aligns objects.  The original code is left in as documentation. */
938 #define cat_start nl_data
939 #define C_locale_data (nl_data + C_LC_ALL + 1 + 90)
940 
941 static const unsigned char nl_data[C_LC_ALL + 1 + 90 + 320] = {
942 /* static const char cat_start[LC_ALL + 1] = { */
943 	'\x00', '\x0b', '\x0e', '\x24', '\x56', '\x56', '\x5a',
944 /* }; */
945 /* static const char item_offset[90] = { */
946 	'\x00', '\x02', '\x04', '\x06', '\x08', '\x0a', '\x0c', '\x0e',
947 	'\x10', '\x12', '\x14', '\x1a', '\x1b', '\x1b', '\x1b', '\x1b',
948 	'\x1b', '\x1b', '\x1b', '\x1b', '\x1b', '\x1c', '\x1c', '\x1c',
949 	'\x1c', '\x1c', '\x1c', '\x1c', '\x1c', '\x1c', '\x1c', '\x1c',
950 	'\x1c', '\x1c', '\x1c', '\x1e', '\x20', '\x24', '\x28', '\x2c',
951 	'\x30', '\x34', '\x38', '\x3c', '\x43', '\x4a', '\x52', '\x5c',
952 	'\x65', '\x6c', '\x75', '\x79', '\x7d', '\x81', '\x85', '\x89',
953 	'\x8d', '\x91', '\x95', '\x99', '\x9d', '\xa1', '\xa5', '\xad',
954 	'\x36', '\x3c', '\x42', '\x46', '\x4b', '\x50', '\x57', '\x61',
955 	'\x69', '\x72', '\x7b', '\x7e', '\x81', '\x96', '\x9f', '\xa8',
956 	'\xb3', '\xb3', '\xb3', '\xb3', '\xb3', '\xb3', '\xb4', '\xba',
957 	'\xbf', '\xbf',
958 /* }; */
959 /* static const char C_locale_data[320] = { */
960 	   '0', '\x00',    '1', '\x00',    '2', '\x00',    '3', '\x00',
961 	   '4', '\x00',    '5', '\x00',    '6', '\x00',    '7', '\x00',
962 	   '8', '\x00',    '9', '\x00',    'A',    'S',    'C',    'I',
963 	   'I', '\x00',    '.', '\x00', '\x7f', '\x00',    '-', '\x00',
964 	   'S',    'u',    'n', '\x00',    'M',    'o',    'n', '\x00',
965 	   'T',    'u',    'e', '\x00',    'W',    'e',    'd', '\x00',
966 	   'T',    'h',    'u', '\x00',    'F',    'r',    'i', '\x00',
967 	   'S',    'a',    't', '\x00',    'S',    'u',    'n',    'd',
968 	   'a',    'y', '\x00',    'M',    'o',    'n',    'd',    'a',
969 	   'y', '\x00',    'T',    'u',    'e',    's',    'd',    'a',
970 	   'y', '\x00',    'W',    'e',    'd',    'n',    'e',    's',
971 	   'd',    'a',    'y', '\x00',    'T',    'h',    'u',    'r',
972 	   's',    'd',    'a',    'y', '\x00',    'F',    'r',    'i',
973 	   'd',    'a',    'y', '\x00',    'S',    'a',    't',    'u',
974 	   'r',    'd',    'a',    'y', '\x00',    'J',    'a',    'n',
975 	'\x00',    'F',    'e',    'b', '\x00',    'M',    'a',    'r',
976 	'\x00',    'A',    'p',    'r', '\x00',    'M',    'a',    'y',
977 	'\x00',    'J',    'u',    'n', '\x00',    'J',    'u',    'l',
978 	'\x00',    'A',    'u',    'g', '\x00',    'S',    'e',    'p',
979 	'\x00',    'O',    'c',    't', '\x00',    'N',    'o',    'v',
980 	'\x00',    'D',    'e',    'c', '\x00',    'J',    'a',    'n',
981 	   'u',    'a',    'r',    'y', '\x00',    'F',    'e',    'b',
982 	   'r',    'u',    'a',    'r',    'y', '\x00',    'M',    'a',
983 	   'r',    'c',    'h', '\x00',    'A',    'p',    'r',    'i',
984 	   'l', '\x00',    'M',    'a',    'y', '\x00',    'J',    'u',
985 	   'n',    'e', '\x00',    'J',    'u',    'l',    'y', '\x00',
986 	   'A',    'u',    'g',    'u',    's',    't', '\x00',    'S',
987 	   'e',    'p',    't',    'e',    'm',    'b',    'e',    'r',
988 	'\x00',    'O',    'c',    't',    'o',    'b',    'e',    'r',
989 	'\x00',    'N',    'o',    'v',    'e',    'm',    'b',    'e',
990 	   'r', '\x00',    'D',    'e',    'c',    'e',    'm',    'b',
991 	   'e',    'r', '\x00',    'A',    'M', '\x00',    'P',    'M',
992 	'\x00',    '%',    'a',    ' ',    '%',    'b',    ' ',    '%',
993 	   'e',    ' ',    '%',    'H',    ':',    '%',    'M',    ':',
994 	   '%',    'S',    ' ',    '%',    'Y', '\x00',    '%',    'm',
995 	   '/',    '%',    'd',    '/',    '%',    'y', '\x00',    '%',
996 	   'H',    ':',    '%',    'M',    ':',    '%',    'S', '\x00',
997 	   '%',    'I',    ':',    '%',    'M',    ':',    '%',    'S',
998 	   ' ',    '%',    'p', '\x00',    '^',    '[',    'y',    'Y',
999 	   ']', '\x00',    '^',    '[',    'n',    'N',    ']', '\x00',
1000 };
1001 
nl_langinfo(nl_item item)1002 char *nl_langinfo(nl_item item)
1003 {
1004 	unsigned int c;
1005 	unsigned int i;
1006 
1007 	if ((c = _NL_ITEM_CATEGORY(item)) < C_LC_ALL) {
1008 		if ((i = cat_start[c] + _NL_ITEM_INDEX(item)) < cat_start[c+1]) {
1009 /*  			return (char *) C_locale_data + item_offset[i] + (i & 64); */
1010 			return (char *) C_locale_data + nl_data[C_LC_ALL+1+i] + 2*(i & 64);
1011 		}
1012 	}
1013 	return (char *) cat_start;	/* Conveniently, this is the empty string. */
1014 }
1015 libc_hidden_def(nl_langinfo)
1016 
1017 #else /* __LOCALE_C_ONLY */
1018 
1019 #if defined(__UCLIBC_HAS_XLOCALE__) && !defined(__UCLIBC_DO_XLOCALE)
1020 
1021 
1022 
1023 char *nl_langinfo(nl_item item)
1024 {
1025 	return nl_langinfo_l(item, __UCLIBC_CURLOCALE);
1026 }
1027 libc_hidden_def(nl_langinfo)
1028 
1029 #else /* defined(__UCLIBC_HAS_XLOCALE__) && !defined(__UCLIBC_DO_XLOCALE) */
1030 
1031 libc_hidden_proto(__XL_NPP(nl_langinfo))
1032 
1033 static const char empty[] = "";
1034 
1035 char *__XL_NPP(nl_langinfo)(nl_item item __LOCALE_PARAM )
1036 {
1037 	unsigned int c = _NL_ITEM_CATEGORY(item);
1038 	unsigned int i = _NL_ITEM_INDEX(item);
1039 
1040 	if ((c < LC_ALL) && (i < __LOCALE_PTR->category_item_count[c])) {
1041 		return ((char **)(((char *) __LOCALE_PTR)
1042 						  + __LOCALE_PTR->category_offsets[c]))[i];
1043 	}
1044 
1045 	return (char *) empty;
1046 }
1047 libc_hidden_def(__XL_NPP(nl_langinfo))
1048 
1049 #endif /* defined(__UCLIBC_HAS_XLOCALE__) && !defined(__UCLIBC_DO_XLOCALE) */
1050 
1051 #endif /* __LOCALE_C_ONLY */
1052 
1053 #endif
1054 /**********************************************************************/
1055 #ifdef L_newlocale
1056 
1057 #warning mask defines for extra locale categories
1058 
1059 #ifdef __UCLIBC_MJN3_ONLY__
1060 #warning TODO: Move posix and utf8 strings.
1061 #endif
1062 static const char posix[] = "POSIX";
1063 static const char utf8[] = "UTF-8";
1064 
find_locale(int category_mask,const char * p,unsigned char * new_locale)1065 static int find_locale(int category_mask, const char *p,
1066 					   unsigned char *new_locale)
1067 {
1068 	int i;
1069 	const unsigned char *s;
1070 	uint16_t n;
1071 	unsigned char lang_cult, codeset;
1072 
1073 #if defined(__LOCALE_DATA_AT_MODIFIERS_LENGTH) && 1
1074 	/* Support standard locale handling for @-modifiers. */
1075 
1076 #ifdef __UCLIBC_MJN3_ONLY__
1077 #warning REMINDER: Fix buf size in find_locale.
1078 #endif
1079 	char buf[18];	/* TODO: 7+{max codeset name length} */
1080 	const char *q;
1081 
1082 	if ((q = strchr(p,'@')) != NULL) {
1083 		if ((((size_t)((q-p)-5)) > (sizeof(buf) - 5)) || (p[2] != '_')) {
1084 			return 0;
1085 		}
1086 		/* locale name at least 5 chars long and 3rd char is '_' */
1087 		s = LOCALE_AT_MODIFIERS;
1088 		do {
1089 			if (!strcmp((char*) (s + 2), q + 1)) {
1090 				break;
1091 			}
1092 			s += 2 + *s;		/* TODO - fix this throughout */
1093 		} while (*s);
1094 		if (!*s) {
1095 			return 0;
1096 		}
1097 		assert(q - p < sizeof(buf));
1098 		memcpy(buf, p, q-p);
1099 		buf[q-p] = 0;
1100 		buf[2] = s[1];
1101 		p = buf;
1102 	}
1103 #endif
1104 
1105 	lang_cult = codeset = 0;	/* Assume C and default codeset.  */
1106 	if (((*p == 'C') && !p[1]) || !strcmp(p, posix)) {
1107 		goto FIND_LOCALE;
1108 	}
1109 
1110 	if ((strlen(p) > 5) && (p[5] == '.')) {	/* Codeset in locale name? */
1111 		/* TODO: maybe CODESET_LIST + *s ??? */
1112 		/* 7bit is 1, UTF-8 is 2, 8-bit is >= 3 */
1113 		codeset = 2;
1114 		if (strcasecmp(utf8, p + 6) != 0) {/* TODO - fix! */
1115 			s = CODESET_LIST;
1116 			do {
1117 				++codeset;		/* Increment codeset first. */
1118 				if (!strcmp((char*) CODESET_LIST + *s, p + 6)) {
1119 					goto FIND_LANG_CULT;
1120 				}
1121 			} while (*++s);
1122 			return 0;			/* No matching codeset! */
1123 		}
1124 	}
1125 
1126  FIND_LANG_CULT:				/* Find language_culture number. */
1127 	s = LOCALE_NAMES;
1128 	do {						/* TODO -- do a binary search? */
1129 		/* TODO -- fix gen_mmap!*/
1130 		++lang_cult;			/* Increment first since C/POSIX is 0. */
1131 		if (!strncmp((char*) s, p, 5)) { /* Found a matching locale name; */
1132 			goto FIND_LOCALE;
1133 		}
1134 		s += 5;
1135 	} while (lang_cult < __LOCALE_DATA_NUM_LOCALE_NAMES);
1136 	return 0;					/* No matching language_culture! */
1137 
1138  FIND_LOCALE:					/* Find locale row matching name and codeset */
1139 	s = LOCALES;
1140 	n = 0;
1141 	do {						/* TODO -- do a binary search? */
1142 		if ((lang_cult == *s) && ((codeset == s[1]) || (codeset == s[2]))) {
1143 			i = 1;
1144 			s = new_locale + 1;
1145 			do {
1146 				if (category_mask & i) {
1147 					/* Encode current locale row number. */
1148 					((unsigned char *) s)[0] = (n >> 7) | 0x80;
1149 					((unsigned char *) s)[1] = (n & 0x7f) | 0x80;
1150 				}
1151 				s += 2;
1152 				i += i;
1153 			} while (i < (1 << LC_ALL));
1154 
1155 			return i;			/* Return non-zero */
1156 		}
1157 		s += __LOCALE_DATA_WIDTH_LOCALES;
1158 		++n;
1159 	} while (n <= __LOCALE_DATA_NUM_LOCALES); /* We started at 1!!! */
1160 
1161 	return 0;					/* Unsupported locale. */
1162 }
1163 
composite_locale(int category_mask,const char * locale,unsigned char * new_locale)1164 static unsigned char *composite_locale(int category_mask, const char *locale,
1165 									   unsigned char *new_locale)
1166 {
1167 	char buf[MAX_LOCALE_STR];
1168 	char *t;
1169 	char *e;
1170 	int c;
1171 	int component_mask;
1172 
1173 	if (!strchr(locale,'=')) {
1174 		if (!find_locale(category_mask, locale, new_locale)) {
1175 			return NULL;
1176 		}
1177 		return new_locale;
1178 	}
1179 
1180 	if (strlen(locale) >= sizeof(buf)) {
1181 		return NULL;
1182 	}
1183 	stpcpy(buf, locale);
1184 
1185 	component_mask = 0;
1186 	t = strtok_r(buf, "=", &e);	/* This can't fail because of strchr test above. */
1187 	do {
1188 		c = 0;
1189 		/* CATEGORY_NAMES is unsigned char* */
1190 		while (strcmp((char*) CATEGORY_NAMES + (int) CATEGORY_NAMES[c], t)) {
1191 			if (++c == LC_ALL) { /* Unknown category name! */
1192 				return NULL;
1193 			}
1194 		}
1195 		t = strtok_r(NULL, ";", &e);
1196 		c = (1 << c);
1197 		if (component_mask & c) { /* Multiple components for one category. */
1198 			return NULL;
1199 		}
1200 		component_mask |= c;
1201 		if ((category_mask & c) && (!t || !find_locale(c, t, new_locale))) {
1202 			return NULL;
1203 		}
1204 	} while ((t = strtok_r(NULL, "=", &e)) != NULL);
1205 
1206 	if (category_mask & ~component_mask) { /* Category component(s) missing. */
1207 		return NULL;
1208 	}
1209 
1210 	return new_locale;
1211 }
1212 
newlocale(int category_mask,const char * locale,__locale_t base)1213 __locale_t newlocale(int category_mask, const char *locale, __locale_t base)
1214 {
1215 	const char *p;
1216 	int i, j, k;
1217 	unsigned char new_selector[LOCALE_SELECTOR_SIZE];
1218 
1219 	if (category_mask == (1 << LC_ALL)) {
1220 		category_mask = LC_ALL_MASK;
1221 	}
1222 
1223 	if (!locale || ((unsigned)(category_mask) > LC_ALL_MASK)) {
1224  INVALID:
1225 		__set_errno(EINVAL);
1226 		return NULL; /* No locale or illegal/unsupported category. */
1227 	}
1228 
1229 #ifdef __UCLIBC_MJN3_ONLY__
1230 #warning TODO: Rename cur_locale to locale_selector.
1231 #endif
1232 	strcpy((char *) new_selector,
1233 		   (base ? (char *) base->cur_locale : C_LOCALE_SELECTOR));
1234 
1235 	if (!locale[0]) {	/* locale == "", so check environment. */
1236 		const char *envstr[4];
1237 
1238 		envstr[0] = "LC_ALL";
1239 		envstr[1] = NULL;
1240 		envstr[2] = "LANG";
1241 		envstr[3] = posix;
1242 
1243 		i = 1;
1244 		k = 0;
1245 		do {
1246 			if (category_mask & i) {
1247 				/* Note: SUSv3 doesn't define a fallback mechanism here.
1248 				 * So, if LC_ALL is invalid, we do _not_ continue trying
1249 				 * the other environment vars. */
1250 				envstr[1] = (char*) CATEGORY_NAMES + CATEGORY_NAMES[k];
1251 				j = 0;
1252 				while (1) {
1253 					p = envstr[j];
1254 					if (++j >= 4)
1255 						break; /* now p == "POSIX" */
1256 					p = getenv(p);
1257 					if (p && p[0])
1258 						break;
1259 				};
1260 
1261 				/* The user set something... is it valid? */
1262 				/* Note: Since we don't support user-supplied locales and
1263 				 * alternate paths, we don't need to worry about special
1264 				 * handling for suid/sgid apps. */
1265 				if (!find_locale(i, p, new_selector)) {
1266 					goto INVALID;
1267 				}
1268 			}
1269 			i += i;
1270 		} while (++k < LC_ALL);
1271 	} else if (!composite_locale(category_mask, locale, new_selector)) {
1272 		goto INVALID;
1273 	}
1274 
1275 #ifdef __UCLIBC_MJN3_ONLY__
1276 #warning TODO: Do a compatible codeset check!
1277 #endif
1278 
1279 	/* If we get here, the new selector corresponds to a valid locale. */
1280 
1281 #ifdef __UCLIBC_MJN3_ONLY__
1282 #warning CONSIDER: Probably want a _locale_new func to allow for caching of locales.
1283 #endif
1284 #if 0
1285 	if (base) {
1286 		_locale_set_l(new_selector, base);
1287 	} else {
1288 		base = _locale_new(new_selector);
1289 	}
1290 #else
1291 	if (!base) {
1292 		base = calloc(1, sizeof(struct __uclibc_locale_struct));
1293 		if (base == NULL)
1294 			return base;
1295 		_locale_init_l(base);
1296 	}
1297 
1298 	_locale_set_l(new_selector, base);
1299 #endif
1300 
1301 	return base;
1302 }
1303 #ifdef __UCLIBC_HAS_XLOCALE__
libc_hidden_def(newlocale)1304 libc_hidden_def(newlocale)
1305 #endif
1306 
1307 #endif
1308 /**********************************************************************/
1309 #ifdef L_duplocale
1310 
1311 
1312 #ifdef __UCLIBC_MJN3_ONLY__
1313 #warning REMINDER: When we allocate ctype tables, remember to dup them.
1314 #endif
1315 
1316 __locale_t duplocale(__locale_t dataset)
1317 {
1318 	__locale_t r;
1319 	uint16_t * i2w;
1320 	size_t n;
1321 
1322 	assert(dataset != LC_GLOBAL_LOCALE);
1323 
1324 	r = malloc(sizeof(struct __uclibc_locale_struct));
1325 	if (r != NULL) {
1326 		n = 2 * dataset->collate.max_col_index + 2;
1327 		i2w = calloc(n, sizeof(uint16_t));
1328 		if (i2w != NULL) {
1329 			memcpy(r, dataset, sizeof(struct __uclibc_locale_struct));
1330 			r->collate.index2weight = i2w;
1331 			memcpy(i2w, dataset->collate.index2weight, n * sizeof(uint16_t));
1332 		} else {
1333 			free(r);
1334 			r = NULL;
1335 		}
1336 	}
1337 	return r;
1338 }
1339 
1340 #endif
1341 /**********************************************************************/
1342 #ifdef L_freelocale
1343 
1344 #ifdef __UCLIBC_MJN3_ONLY__
1345 #warning REMINDER: When we allocate ctype tables, remember to free them.
1346 #endif
1347 
freelocale(__locale_t dataset)1348 void freelocale(__locale_t dataset)
1349 {
1350 	assert(dataset != __global_locale);
1351 	assert(dataset != LC_GLOBAL_LOCALE);
1352 
1353 	free(dataset->collate.index2weight); /* Free collation data. */
1354 	free(dataset);				/* Free locale */
1355 }
1356 
1357 #endif
1358 /**********************************************************************/
1359 #ifdef L_uselocale
1360 
uselocale(__locale_t dataset)1361 __locale_t uselocale(__locale_t dataset)
1362 {
1363 	__locale_t old;
1364 
1365 	if (!dataset) {
1366 		old = __UCLIBC_CURLOCALE;
1367 	} else {
1368 		if (dataset == LC_GLOBAL_LOCALE) {
1369 			dataset = __global_locale;
1370 		}
1371 #ifdef __UCLIBC_HAS_THREADS__
1372 		old = __curlocale_set(dataset);
1373 #else
1374 		old = __curlocale_var;
1375 		__curlocale_var = dataset;
1376 #endif
1377 	}
1378 
1379 	if (old == __global_locale) {
1380 		return LC_GLOBAL_LOCALE;
1381 	}
1382 	return old;
1383 }
libc_hidden_def(uselocale)1384 libc_hidden_def(uselocale)
1385 
1386 #endif
1387 /**********************************************************************/
1388 #ifdef L___curlocale
1389 
1390 #ifdef __UCLIBC_HAS_THREADS__
1391 
1392 __locale_t weak_const_function __curlocale(void)
1393 {
1394 	return __curlocale_var; /* This is overriden by the thread version. */
1395 }
libc_hidden_weak(__curlocale)1396 libc_hidden_weak(__curlocale)
1397 
1398 __locale_t weak_function __curlocale_set(__locale_t newloc)
1399 {
1400 	__locale_t oldloc = __curlocale_var;
1401 	assert(newloc != LC_GLOBAL_LOCALE);
1402 	__curlocale_var = newloc;
1403 	return oldloc;
1404 }
1405 libc_hidden_weak(__curlocale_set)
1406 
1407 #endif
1408 
1409 #endif
1410 /**********************************************************************/
1411 #ifdef L___locale_mbrtowc_l
1412 
1413 /* NOTE: This returns an int... not size_t.  Also, it is not a general
1414  * routine.  It is actually a very stripped-down version of mbrtowc
1415  * that takes a __locale_t arg.  This is used by strcoll and strxfrm.
1416  * It is also used above to generate wchar_t versions of the decimal point
1417  * and thousands seperator. */
1418 
1419 
1420 #ifndef __CTYPE_HAS_UTF_8_LOCALES
1421 #warning __CTYPE_HAS_UTF_8_LOCALES not set!
1422 #endif
1423 #ifndef __CTYPE_HAS_8_BIT_LOCALES
1424 #warning __CTYPE_HAS_8_BIT_LOCALES not set!
1425 #endif
1426 
1427 #define Cc2wc_IDX_SHIFT		__LOCALE_DATA_Cc2wc_IDX_SHIFT
1428 #define Cc2wc_ROW_LEN		__LOCALE_DATA_Cc2wc_ROW_LEN
1429 
1430 extern size_t _wchar_utf8sntowcs(wchar_t *__restrict pwc, size_t wn,
1431 						 const char **__restrict src, size_t n,
1432 						 mbstate_t *ps, int allow_continuation) attribute_hidden;
1433 
__locale_mbrtowc_l(wchar_t * __restrict dst,const char * __restrict src,__locale_t loc)1434 int attribute_hidden __locale_mbrtowc_l(wchar_t *__restrict dst,
1435 					   const char *__restrict src,
1436 					   __locale_t loc )
1437 {
1438 #ifdef __CTYPE_HAS_UTF_8_LOCALES
1439 	if (loc->encoding == __ctype_encoding_utf8) {
1440 		mbstate_t ps;
1441 		const char *p = src;
1442 		size_t r;
1443 		ps.__mask = 0;
1444 		r = _wchar_utf8sntowcs(dst, 1, &p, SIZE_MAX, &ps, 1);
1445 		return (r == 1) ? (p-src) : r; /* Need to return 0 if nul char. */
1446 	}
1447 #endif
1448 
1449 #ifdef __CTYPE_HAS_8_BIT_LOCALES
1450 	assert((loc->encoding == __ctype_encoding_7_bit) || (loc->encoding == __ctype_encoding_8_bit));
1451 #else
1452 	assert(loc->encoding == __ctype_encoding_7_bit);
1453 #endif
1454 
1455 	if ((*dst = ((unsigned char)(*src))) < 0x80) {	/* ASCII... */
1456 		return (*src != 0);
1457 	}
1458 
1459 #ifdef __CTYPE_HAS_8_BIT_LOCALES
1460 	if (loc->encoding == __ctype_encoding_8_bit) {
1461 		wchar_t wc = *dst - 0x80;
1462 		*dst = loc->tbl8c2wc[
1463 						(loc->idx8c2wc[wc >> Cc2wc_IDX_SHIFT]
1464 						 << Cc2wc_IDX_SHIFT) + (wc & (Cc2wc_ROW_LEN - 1))];
1465 		if (*dst) {
1466 			return 1;
1467 		}
1468 	}
1469 #endif
1470 
1471 	return -1;
1472 }
1473 
1474 #endif
1475 /**********************************************************************/
1476