ReactOS 0.4.16-dev-852-gcfcc8d8
argv_parsing.cpp
Go to the documentation of this file.
1/***
2*stdargv.c - standard & wildcard _setargv routine
3*
4* Copyright (c) Microsoft Corporation. All rights reserved.
5*
6*Purpose:
7* processes program command line, with or without wildcard expansion
8*
9*******************************************************************************/
10
11#include <corecrt_internal.h>
13#include <limits.h>
14#include <mbstring.h>
15#include <stdlib.h>
16
17
18
19// In the function below, we need to ensure that we've initialized the mbc table
20// before we start performing character transformations.
22static void do_locale_initialization(wchar_t) throw() { /* no-op */ }
23
24static char* get_command_line(char) throw() { return _acmdln; }
25static wchar_t* get_command_line(wchar_t) throw() { return _wcmdln; }
26
27static char**& get_argv(char) throw() { return __argv; }
28static wchar_t**& get_argv(wchar_t) throw() { return __wargv; }
29
31 _In_z_ char** const argv,
32 _Out_ _Deref_post_z_ char*** const expanded_argv) throw()
33{
34 return __acrt_expand_narrow_argv_wildcards(argv, expanded_argv);
35}
36
38 _In_z_ wchar_t** const argv,
39 _Out_ _Deref_post_z_ wchar_t*** const expanded_argv) throw()
40{
41 return __acrt_expand_wide_argv_wildcards(argv, expanded_argv);
42}
43
44
45
46/***
47*static void parse_cmdline(cmdstart, argv, args, argument_count, character_count)
48*
49*Purpose:
50* Parses the command line and sets up the argv[] array.
51* On entry, cmdstart should point to the command line,
52* argv should point to memory for the argv array, args
53* points to memory to place the text of the arguments.
54* If these are nullptr, then no storing (only counting)
55* is done. On exit, *argument_count has the number of
56* arguments (plus one for a final nullptr argument),
57* and *character_count has the number of bytes used in the buffer
58* pointed to by args.
59*
60*Entry:
61* Character *cmdstart - pointer to command line of the form
62* <progname><nul><args><nul>
63* Character **argv - where to build argv array; nullptr means don't
64* build array
65* Character *args - where to place argument text; nullptr means don't
66* store text
67*
68*Exit:
69* no return value
70* int *argument_count - returns number of argv entries created
71* int *character_count - number of characters used in args buffer
72*
73*Exceptions:
74*
75*******************************************************************************/
76
77
78// should_copy_another_character helper functions
79// should_copy_another_character is *ONLY* checking for DBCS lead bytes to see if there
80// might be a following trail byte. This works because the callers are only concerned
81// about escaped quote sequences and other codepages aren't using those quotes.
82static bool __cdecl should_copy_another_character(char const c) throw()
83{
84 // This is OK for UTF-8 as a quote is never a trail byte.
85 return _ismbblead(c) != 0;
86}
87
88static bool __cdecl should_copy_another_character(wchar_t) throw()
89{
90 // This is OK for UTF-16 as a quote is never part of a surrogate pair.
91 return false;
92}
93
94template <typename Character>
96 Character* cmdstart,
97 Character** argv,
98 Character* args,
99 size_t* argument_count,
100 size_t* character_count
101 ) throw()
102{
103 *character_count = 0;
104 *argument_count = 1; // We'll have at least the program name
105
106 Character c;
107 int copy_character; /* 1 = copy char to *args */
108 unsigned numslash; /* num of backslashes seen */
109
110 /* first scan the program name, copy it, and count the bytes */
111 Character* p = cmdstart;
112 if (argv)
113 *argv++ = args;
114
115 // A quoted program name is handled here. The handling is much
116 // simpler than for other arguments. Basically, whatever lies
117 // between the leading double-quote and next one, or a terminal null
118 // character is simply accepted. Fancier handling is not required
119 // because the program name must be a legal NTFS/HPFS file name.
120 // Note that the double-quote characters are not copied, nor do they
121 // contribute to character_count.
122 bool in_quotes = false;
123 do
124 {
125 if (*p == '"')
126 {
127 in_quotes = !in_quotes;
128 c = *p++;
129 continue;
130 }
131
133 if (args)
134 *args++ = *p;
135
136 c = *p++;
137
139 {
141 if (args)
142 *args++ = *p; // Copy 2nd byte too
143 ++p; // skip over trail byte
144 }
145 }
146 while (c != '\0' && (in_quotes || (c != ' ' && c != '\t')));
147
148 if (c == '\0')
149 {
150 p--;
151 }
152 else
153 {
154 if (args)
155 *(args - 1) = '\0';
156 }
157
158 in_quotes = false;
159
160 // Loop on each argument
161 for (;;)
162 {
163 if (*p)
164 {
165 while (*p == ' ' || *p == '\t')
166 ++p;
167 }
168
169 if (*p == '\0')
170 break; // End of arguments
171
172 // Scan an argument:
173 if (argv)
174 *argv++ = args;
175
176 ++*argument_count;
177
178 // Loop through scanning one argument:
179 for (;;)
180 {
181 copy_character = 1;
182
183 // Rules:
184 // 2N backslashes + " ==> N backslashes and begin/end quote
185 // 2N + 1 backslashes + " ==> N backslashes + literal "
186 // N backslashes ==> N backslashes
187 numslash = 0;
188
189 while (*p == '\\')
190 {
191 // Count number of backslashes for use below
192 ++p;
193 ++numslash;
194 }
195
196 if (*p == '"')
197 {
198 // if 2N backslashes before, start/end quote, otherwise
199 // copy literally:
200 if (numslash % 2 == 0)
201 {
202 if (in_quotes && p[1] == '"')
203 {
204 p++; // Double quote inside quoted string
205 }
206 else
207 {
208 // Skip first quote char and copy second:
209 copy_character = 0; // Don't copy quote
210 in_quotes = !in_quotes;
211 }
212 }
213
214 numslash /= 2;
215 }
216
217 // Copy slashes:
218 while (numslash--)
219 {
220 if (args)
221 *args++ = '\\';
223 }
224
225 // If at end of arg, break loop:
226 if (*p == '\0' || (!in_quotes && (*p == ' ' || *p == '\t')))
227 break;
228
229 // Copy character into argument:
230 if (copy_character)
231 {
232 if (args)
233 *args++ = *p;
234
236 {
237 ++p;
239
240 if (args)
241 *args++ = *p;
242 }
243
245 }
246
247 ++p;
248 }
249
250 // Null-terminate the argument:
251 if (args)
252 *args++ = '\0'; // Terminate the string
253
255 }
256
257 // We put one last argument in -- a null pointer:
258 if (argv)
259 *argv++ = nullptr;
260
261 ++*argument_count;
262}
263
264
265
266extern "C" unsigned char* __cdecl __acrt_allocate_buffer_for_argv(
267 size_t const argument_count,
268 size_t const character_count,
269 size_t const character_size
270 )
271{
272 if (argument_count >= SIZE_MAX / sizeof(void*))
273 return nullptr;
274
276 return nullptr;
277
278 size_t const argument_array_size = argument_count * sizeof(void*);
279 size_t const character_array_size = character_count * character_size;
280
281 if (SIZE_MAX - argument_array_size <= character_array_size)
282 return nullptr;
283
284 size_t const total_size = argument_array_size + character_array_size;
285 __crt_unique_heap_ptr<unsigned char> buffer(_calloc_crt_t(unsigned char, total_size));
286 if (!buffer)
287 return nullptr;
288
289 return buffer.detach();
290}
291
292
293
294/***
295*_setargv, __setargv - set up "argc" and "argv" for C programs
296*
297*Purpose:
298* Read the command line and create the argv array for C
299* programs.
300*
301*Entry:
302* Arguments are retrieved from the program command line,
303* pointed to by _acmdln.
304*
305*Exit:
306* Returns 0 if successful, -1 if memory allocation failed.
307* "argv" points to a null-terminated list of pointers to ASCIZ
308* strings, each of which is an argument from the command line.
309* "argc" is the number of arguments. The strings are copied from
310* the environment segment into space allocated on the heap/stack.
311* The list of pointers is also located on the heap or stack.
312* _pgmptr points to the program name.
313*
314*Exceptions:
315* Terminates with out of memory error if no memory to allocate.
316*
317*******************************************************************************/
318template <typename Character>
320{
321 typedef __crt_char_traits<Character> traits;
322
324 {
325 return 0;
326 }
327
331
332 do_locale_initialization(Character());
333
334
335 static Character program_name[MAX_PATH + 1];
336 traits::get_module_file_name(nullptr, program_name, MAX_PATH);
337 traits::set_program_name(&program_name[0]);
338
339 // If there's no command line at all, then use the program name as the
340 // command line to parse, so that argv[0] is initialized with the program
341 // name. (This won't happen when the program is run by cmd.exe, but it
342 // could happen if the program is spawned via some other means.)
343 Character* const raw_command_line = get_command_line(Character());
344 Character* const command_line = raw_command_line == nullptr || raw_command_line[0] == '\0'
346 : raw_command_line;
347
348 size_t argument_count = 0;
349 size_t character_count = 0;
351 command_line,
352 static_cast<Character**>(nullptr),
353 static_cast<Character*>(nullptr),
354 &argument_count,
356
357 __crt_unique_heap_ptr<unsigned char> buffer(__acrt_allocate_buffer_for_argv(
358 argument_count,
360 sizeof(Character)));
361
363
364 Character** const first_argument = reinterpret_cast<Character**>(buffer.get());
365 Character* const first_string = reinterpret_cast<Character*>(buffer.get() + argument_count * sizeof(Character*));
366
367 parse_command_line(command_line, first_argument, first_string, &argument_count, &character_count);
368
369 // If we are not expanding wildcards, then we are done...
371 {
372 __argc = static_cast<int>(argument_count - 1);
373 get_argv(Character()) = reinterpret_cast<Character**>(buffer.detach());
374 return 0;
375 }
376
377 // ... otherwise, we try to do the wildcard expansion:
378 __crt_unique_heap_ptr<Character*> expanded_argv;
379 errno_t const argv_expansion_status = expand_argv_wildcards(first_argument, expanded_argv.get_address_of());
380 if (argv_expansion_status != 0)
381 return argv_expansion_status;
382
383 __argc = [&]()
384 {
385 size_t n = 0;
386 for (auto it = expanded_argv.get(); *it; ++it, ++n) { }
387 return static_cast<int>(n);
388 }();
389
390 get_argv(Character()) = expanded_argv.detach();
391 return 0;
392}
393
394
395
397{
398 return common_configure_argv<char>(mode);
399}
400
402{
403 return common_configure_argv<wchar_t>(mode);
404}
#define EINVAL
Definition: acclib.h:90
#define ENOMEM
Definition: acclib.h:84
#define __cdecl
Definition: accygwin.h:79
errno_t __cdecl _configure_wide_argv(_crt_argv_mode const mode)
static errno_t expand_argv_wildcards(_In_z_ char **const argv, _Out_ _Deref_post_z_ char ***const expanded_argv)
static void __cdecl parse_command_line(Character *cmdstart, Character **argv, Character *args, size_t *argument_count, size_t *character_count)
errno_t __cdecl _configure_narrow_argv(_crt_argv_mode const mode)
static bool __cdecl should_copy_another_character(char const c)
static void do_locale_initialization(wchar_t)
static errno_t __cdecl common_configure_argv(_crt_argv_mode const mode)
static char **& get_argv(char)
unsigned char *__cdecl __acrt_allocate_buffer_for_argv(size_t const argument_count, size_t const character_count, size_t const character_size)
static char * get_command_line(char)
errno_t __acrt_expand_wide_argv_wildcards(wchar_t **const argv, wchar_t ***const result)
errno_t __acrt_expand_narrow_argv_wildcards(char **const argv, char ***const result)
_In_z_ Character const *const first_argument
Definition: cenvarg.cpp:289
_In_ size_t character_count
_In_ size_t _In_ size_t character_size
bool __cdecl __acrt_initialize_multibyte(void)
Definition: mbctype.cpp:894
#define _wcmdln
#define _acmdln
#define MAX_PATH
Definition: compat.h:34
GLdouble n
Definition: glext.h:7729
GLuint buffer
Definition: glext.h:5915
const GLubyte * c
Definition: glext.h:8905
GLenum mode
Definition: glext.h:6217
GLfloat GLfloat p
Definition: glext.h:8902
#define __argv
Definition: stdlib.h:1154
#define __argc
Definition: stdlib.h:1153
#define __wargv
Definition: stdlib.h:1155
#define _VALIDATE_RETURN_ERRCODE(expr, errorcode)
#define _VALIDATE_RETURN_ERRCODE_NOEXC(expr, errorcode)
#define c
Definition: ke_i.h:80
static char * program_name
Definition: mkdosfs.c:519
int __cdecl _ismbblead(unsigned int)
Definition: ismblead.c:20
#define argv
Definition: mplay32.c:18
#define _Deref_post_z_
Definition: ms_sal.h:1121
#define _In_z_
Definition: no_sal2.h:164
#define _Out_
Definition: no_sal2.h:160
#define SIZE_MAX
Definition: compat.h:66
#define args
Definition: format.c:66
Definition: match.c:390
int errno_t
Definition: corecrt.h:615
_crt_argv_mode
@ _crt_argv_no_arguments
@ _crt_argv_expanded_arguments
@ _crt_argv_unexpanded_arguments