1 /*
2 * Copyright 2016-2020 The OpenSSL Project Authors. All Rights Reserved.
3 *
4 * Licensed under the Apache License 2.0 (the "License"). You may not use
5 * this file except in compliance with the License. You can obtain a copy
6 * in the file LICENSE in the source distribution or at
7 * https://www.openssl.org/source/license.html
8 */
9
10 #include <windows.h>
11 #include <stdlib.h>
12 #include <string.h>
13 #include <malloc.h>
14
15 #if defined(CP_UTF8)
16
17 static UINT saved_cp;
18 static int newargc;
19 static char **newargv;
20
cleanup(void)21 static void cleanup(void)
22 {
23 int i;
24
25 SetConsoleOutputCP(saved_cp);
26
27 for (i = 0; i < newargc; i++)
28 free(newargv[i]);
29
30 free(newargv);
31 }
32
33 /*
34 * Incrementally [re]allocate newargv and keep it NULL-terminated.
35 */
validate_argv(int argc)36 static int validate_argv(int argc)
37 {
38 static int size = 0;
39
40 if (argc >= size) {
41 char **ptr;
42
43 while (argc >= size)
44 size += 64;
45
46 ptr = realloc(newargv, size * sizeof(newargv[0]));
47 if (ptr == NULL)
48 return 0;
49
50 (newargv = ptr)[argc] = NULL;
51 } else {
52 newargv[argc] = NULL;
53 }
54
55 return 1;
56 }
57
process_glob(WCHAR * wstr,int wlen)58 static int process_glob(WCHAR *wstr, int wlen)
59 {
60 int i, slash, udlen;
61 WCHAR saved_char;
62 WIN32_FIND_DATAW data;
63 HANDLE h;
64
65 /*
66 * Note that we support wildcard characters only in filename part
67 * of the path, and not in directories. Windows users are used to
68 * this, that's why recursive glob processing is not implemented.
69 */
70 /*
71 * Start by looking for last slash or backslash, ...
72 */
73 for (slash = 0, i = 0; i < wlen; i++)
74 if (wstr[i] == L'/' || wstr[i] == L'\\')
75 slash = i + 1;
76 /*
77 * ... then look for asterisk or question mark in the file name.
78 */
79 for (i = slash; i < wlen; i++)
80 if (wstr[i] == L'*' || wstr[i] == L'?')
81 break;
82
83 if (i == wlen)
84 return 0; /* definitely not a glob */
85
86 saved_char = wstr[wlen];
87 wstr[wlen] = L'\0';
88 h = FindFirstFileW(wstr, &data);
89 wstr[wlen] = saved_char;
90 if (h == INVALID_HANDLE_VALUE)
91 return 0; /* not a valid glob, just pass... */
92
93 if (slash)
94 udlen = WideCharToMultiByte(CP_UTF8, 0, wstr, slash,
95 NULL, 0, NULL, NULL);
96 else
97 udlen = 0;
98
99 do {
100 int uflen;
101 char *arg;
102
103 /*
104 * skip over . and ..
105 */
106 if (data.cFileName[0] == L'.') {
107 if ((data.cFileName[1] == L'\0') ||
108 (data.cFileName[1] == L'.' && data.cFileName[2] == L'\0'))
109 continue;
110 }
111
112 if (!validate_argv(newargc + 1))
113 break;
114
115 /*
116 * -1 below means "scan for trailing '\0' *and* count it",
117 * so that |uflen| covers even trailing '\0'.
118 */
119 uflen = WideCharToMultiByte(CP_UTF8, 0, data.cFileName, -1,
120 NULL, 0, NULL, NULL);
121
122 arg = malloc(udlen + uflen);
123 if (arg == NULL)
124 break;
125
126 if (udlen)
127 WideCharToMultiByte(CP_UTF8, 0, wstr, slash,
128 arg, udlen, NULL, NULL);
129
130 WideCharToMultiByte(CP_UTF8, 0, data.cFileName, -1,
131 arg + udlen, uflen, NULL, NULL);
132
133 newargv[newargc++] = arg;
134 } while (FindNextFileW(h, &data));
135
136 CloseHandle(h);
137
138 return 1;
139 }
140
win32_utf8argv(int * argc,char ** argv[])141 void win32_utf8argv(int *argc, char **argv[])
142 {
143 const WCHAR *wcmdline;
144 WCHAR *warg, *wend, *p;
145 int wlen, ulen, valid = 1;
146 char *arg;
147
148 if (GetEnvironmentVariableW(L"OPENSSL_WIN32_UTF8", NULL, 0) == 0)
149 return;
150
151 newargc = 0;
152 newargv = NULL;
153 if (!validate_argv(newargc))
154 return;
155
156 wcmdline = GetCommandLineW();
157 if (wcmdline == NULL) return;
158
159 /*
160 * make a copy of the command line, since we might have to modify it...
161 */
162 wlen = (int)wcslen(wcmdline);
163 p = _alloca((wlen + 1) * sizeof(WCHAR));
164 memcpy(p, wcmdline, (wlen + 1) * sizeof(WCHAR));
165
166 while (*p != L'\0') {
167 int in_quote = 0;
168
169 if (*p == L' ' || *p == L'\t') {
170 p++; /* skip over whitespace */
171 continue;
172 }
173
174 /*
175 * Note: because we may need to fiddle with the number of backslashes,
176 * the argument string is copied into itself. This is safe because
177 * the number of characters will never expand.
178 */
179 warg = wend = p;
180 while (*p != L'\0'
181 && (in_quote || (*p != L' ' && *p != L'\t'))) {
182 switch (*p) {
183 case L'\\':
184 /*
185 * Microsoft documentation on how backslashes are treated
186 * is:
187 *
188 * + Backslashes are interpreted literally, unless they
189 * immediately precede a double quotation mark.
190 * + If an even number of backslashes is followed by a double
191 * quotation mark, one backslash is placed in the argv array
192 * for every pair of backslashes, and the double quotation
193 * mark is interpreted as a string delimiter.
194 * + If an odd number of backslashes is followed by a double
195 * quotation mark, one backslash is placed in the argv array
196 * for every pair of backslashes, and the double quotation
197 * mark is "escaped" by the remaining backslash, causing a
198 * literal double quotation mark (") to be placed in argv.
199 *
200 * Ref: https://msdn.microsoft.com/en-us/library/17w5ykft.aspx
201 *
202 * Though referred page doesn't mention it, multiple qouble
203 * quotes are also special. Pair of double quotes in quoted
204 * string is counted as single double quote.
205 */
206 {
207 const WCHAR *q = p;
208 size_t i;
209
210 while (*p == L'\\')
211 p++;
212
213 if (*p == L'"') {
214 for (i = (p - q) / 2; i > 0; i--)
215 *wend++ = L'\\';
216
217 /*
218 * if odd amount of backslashes before the quote,
219 * said quote is part of the argument, not a delimiter
220 */
221 if ((p - q) % 2 == 1)
222 *wend++ = *p++;
223 } else {
224 for (i = p - q; i > 0; i--)
225 *wend++ = L'\\';
226 }
227 }
228 break;
229 case L'"':
230 /*
231 * Without the preceding backslash (or when preceded with an
232 * even number of backslashes), the double quote is a simple
233 * string delimiter and just slightly change the parsing state
234 */
235 if (in_quote && p[1] == L'"')
236 *wend++ = *p++;
237 else
238 in_quote = !in_quote;
239 p++;
240 break;
241 default:
242 /*
243 * Any other non-delimiter character is just taken verbatim
244 */
245 *wend++ = *p++;
246 }
247 }
248
249 wlen = (int)(wend - warg);
250
251 if (wlen == 0 || !process_glob(warg, wlen)) {
252 if (!validate_argv(newargc + 1)) {
253 valid = 0;
254 break;
255 }
256
257 ulen = 0;
258 if (wlen > 0) {
259 ulen = WideCharToMultiByte(CP_UTF8, 0, warg, wlen,
260 NULL, 0, NULL, NULL);
261 if (ulen <= 0)
262 continue;
263 }
264
265 arg = malloc(ulen + 1);
266 if (arg == NULL) {
267 valid = 0;
268 break;
269 }
270
271 if (wlen > 0)
272 WideCharToMultiByte(CP_UTF8, 0, warg, wlen,
273 arg, ulen, NULL, NULL);
274 arg[ulen] = '\0';
275
276 newargv[newargc++] = arg;
277 }
278 }
279
280 if (valid) {
281 saved_cp = GetConsoleOutputCP();
282 SetConsoleOutputCP(CP_UTF8);
283
284 *argc = newargc;
285 *argv = newargv;
286
287 atexit(cleanup);
288 } else if (newargv != NULL) {
289 int i;
290
291 for (i = 0; i < newargc; i++)
292 free(newargv[i]);
293
294 free(newargv);
295
296 newargc = 0;
297 newargv = NULL;
298 }
299
300 return;
301 }
302 #else
win32_utf8argv(int * argc,char ** argv[])303 void win32_utf8argv(int *argc, char **argv[])
304 { return; }
305 #endif
306