1 /*-
2 * Copyright (c) 1991, 1993
3 * The Regents of the University of California. All rights reserved.
4 * Copyright (c) 1997-2005
5 * Herbert Xu <herbert@gondor.apana.org.au>. All rights reserved.
6 *
7 * This code is derived from software contributed to Berkeley by
8 * Kenneth Almquist.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 3. Neither the name of the University nor the names of its contributors
19 * may be used to endorse or promote products derived from this software
20 * without specific prior written permission.
21 *
22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * SUCH DAMAGE.
33 */
34
35 #if HAVE_ALLOCA_H
36 #include <alloca.h>
37 #endif
38
39 #include <stdlib.h>
40
41 #include "shell.h"
42 #include "parser.h"
43 #include "nodes.h"
44 #include "expand.h" /* defines rmescapes() */
45 #include "exec.h" /* defines find_builtin() */
46 #include "syntax.h"
47 #include "options.h"
48 #include "input.h"
49 #include "output.h"
50 #include "var.h"
51 #include "error.h"
52 #include "memalloc.h"
53 #include "mystring.h"
54 #include "alias.h"
55 #include "show.h"
56 #include "builtins.h"
57 #include "system.h"
58
59 /*
60 * Shell command parser.
61 */
62
63 /* values returned by readtoken */
64 #include "token_vars.h"
65
66
67
68 /* Used by expandstr to get here-doc like behaviour. */
69 #define FAKEEOFMARK (char *)1
70
71
72
73 struct heredoc {
74 struct heredoc *next; /* next here document in list */
75 union node *here; /* redirection node */
76 char *eofmark; /* string indicating end of input */
77 int striptabs; /* if set, strip leading tabs */
78 };
79
80
81
82 struct heredoc *heredoclist; /* list of here documents to read */
83 int doprompt; /* if set, prompt the user */
84 int needprompt; /* true if interactive and at start of line */
85 int lasttoken; /* last token read */
86 int tokpushback; /* last token pushed back */
87 char *wordtext; /* text of last word returned by readtoken */
88 int checkkwd;
89 struct nodelist *backquotelist;
90 union node *redirnode;
91 struct heredoc *heredoc;
92 int quoteflag; /* set if (part of) last token was quoted */
93
94
95 STATIC union node *list(int);
96 STATIC union node *andor(void);
97 STATIC union node *pipeline(void);
98 STATIC union node *command(void);
99 STATIC union node *simplecmd(void);
100 STATIC union node *makename(void);
101 STATIC void parsefname(void);
102 STATIC void parseheredoc(void);
103 STATIC int peektoken(void);
104 STATIC int readtoken(void);
105 STATIC int xxreadtoken(void);
106 STATIC int readtoken1(int, char const *, char *, int);
107 STATIC void synexpect(int) __attribute__((__noreturn__));
108 STATIC void synerror(const char *) __attribute__((__noreturn__));
109 STATIC void setprompt(int);
110
111
112 static inline int
isassignment(const char * p)113 isassignment(const char *p)
114 {
115 const char *q = endofname(p);
116 if (p == q)
117 return 0;
118 return *q == '=';
119 }
120
realeofmark(const char * eofmark)121 static inline int realeofmark(const char *eofmark)
122 {
123 return eofmark && eofmark != FAKEEOFMARK;
124 }
125
126
127 /*
128 * Read and parse a command. Returns NEOF on end of file. (NULL is a
129 * valid parse tree indicating a blank line.)
130 */
131
132 union node *
parsecmd(int interact)133 parsecmd(int interact)
134 {
135 tokpushback = 0;
136 checkkwd = 0;
137 heredoclist = 0;
138 doprompt = interact;
139 if (doprompt)
140 setprompt(doprompt);
141 needprompt = 0;
142 return list(1);
143 }
144
145
146 STATIC union node *
list(int nlflag)147 list(int nlflag)
148 {
149 union node *n1, *n2, *n3;
150 int tok;
151
152 n1 = NULL;
153 for (;;) {
154 switch (peektoken()) {
155 case TNL:
156 if (!(nlflag & 1))
157 break;
158 parseheredoc();
159 return n1;
160
161 case TEOF:
162 if (!n1 && (nlflag & 1))
163 n1 = NEOF;
164 parseheredoc();
165 return n1;
166 }
167
168 checkkwd = CHKNL | CHKKWD | CHKALIAS;
169 if (nlflag == 2 && tokendlist[peektoken()])
170 return n1;
171 nlflag |= 2;
172
173 n2 = andor();
174 tok = readtoken();
175 if (tok == TBACKGND) {
176 if (n2->type == NPIPE) {
177 n2->npipe.backgnd = 1;
178 } else {
179 if (n2->type != NREDIR) {
180 n3 = stalloc(sizeof(struct nredir));
181 n3->nredir.n = n2;
182 n3->nredir.redirect = NULL;
183 n2 = n3;
184 }
185 n2->type = NBACKGND;
186 }
187 }
188 if (n1 == NULL) {
189 n1 = n2;
190 }
191 else {
192 n3 = (union node *)stalloc(sizeof (struct nbinary));
193 n3->type = NSEMI;
194 n3->nbinary.ch1 = n1;
195 n3->nbinary.ch2 = n2;
196 n1 = n3;
197 }
198 switch (tok) {
199 case TNL:
200 case TEOF:
201 tokpushback++;
202 /* fall through */
203 case TBACKGND:
204 case TSEMI:
205 break;
206 default:
207 if ((nlflag & 1))
208 synexpect(-1);
209 tokpushback++;
210 return n1;
211 }
212 }
213 }
214
215
216
217 STATIC union node *
andor(void)218 andor(void)
219 {
220 union node *n1, *n2, *n3;
221 int t;
222
223 n1 = pipeline();
224 for (;;) {
225 if ((t = readtoken()) == TAND) {
226 t = NAND;
227 } else if (t == TOR) {
228 t = NOR;
229 } else {
230 tokpushback++;
231 return n1;
232 }
233 checkkwd = CHKNL | CHKKWD | CHKALIAS;
234 n2 = pipeline();
235 n3 = (union node *)stalloc(sizeof (struct nbinary));
236 n3->type = t;
237 n3->nbinary.ch1 = n1;
238 n3->nbinary.ch2 = n2;
239 n1 = n3;
240 }
241 }
242
243
244
245 STATIC union node *
pipeline(void)246 pipeline(void)
247 {
248 union node *n1, *n2, *pipenode;
249 struct nodelist *lp, *prev;
250 int negate;
251
252 negate = 0;
253 TRACE(("pipeline: entered\n"));
254 if (readtoken() == TNOT) {
255 negate = !negate;
256 checkkwd = CHKKWD | CHKALIAS;
257 } else
258 tokpushback++;
259 n1 = command();
260 if (readtoken() == TPIPE) {
261 pipenode = (union node *)stalloc(sizeof (struct npipe));
262 pipenode->type = NPIPE;
263 pipenode->npipe.backgnd = 0;
264 lp = (struct nodelist *)stalloc(sizeof (struct nodelist));
265 pipenode->npipe.cmdlist = lp;
266 lp->n = n1;
267 do {
268 prev = lp;
269 lp = (struct nodelist *)stalloc(sizeof (struct nodelist));
270 checkkwd = CHKNL | CHKKWD | CHKALIAS;
271 lp->n = command();
272 prev->next = lp;
273 } while (readtoken() == TPIPE);
274 lp->next = NULL;
275 n1 = pipenode;
276 }
277 tokpushback++;
278 if (negate) {
279 n2 = (union node *)stalloc(sizeof (struct nnot));
280 n2->type = NNOT;
281 n2->nnot.com = n1;
282 return n2;
283 } else
284 return n1;
285 }
286
287
288
289 STATIC union node *
command(void)290 command(void)
291 {
292 union node *n1, *n2;
293 union node *ap, **app;
294 union node *cp, **cpp;
295 union node *redir, **rpp;
296 union node **rpp2;
297 int t;
298 int savelinno;
299
300 redir = NULL;
301 rpp2 = &redir;
302
303 savelinno = plinno;
304
305 switch (readtoken()) {
306 default:
307 synexpect(-1);
308 /* NOTREACHED */
309 case TIF:
310 n1 = (union node *)stalloc(sizeof (struct nif));
311 n1->type = NIF;
312 n1->nif.test = list(0);
313 if (readtoken() != TTHEN)
314 synexpect(TTHEN);
315 n1->nif.ifpart = list(0);
316 n2 = n1;
317 while (readtoken() == TELIF) {
318 n2->nif.elsepart = (union node *)stalloc(sizeof (struct nif));
319 n2 = n2->nif.elsepart;
320 n2->type = NIF;
321 n2->nif.test = list(0);
322 if (readtoken() != TTHEN)
323 synexpect(TTHEN);
324 n2->nif.ifpart = list(0);
325 }
326 if (lasttoken == TELSE)
327 n2->nif.elsepart = list(0);
328 else {
329 n2->nif.elsepart = NULL;
330 tokpushback++;
331 }
332 t = TFI;
333 break;
334 case TWHILE:
335 case TUNTIL: {
336 int got;
337 n1 = (union node *)stalloc(sizeof (struct nbinary));
338 n1->type = (lasttoken == TWHILE)? NWHILE : NUNTIL;
339 n1->nbinary.ch1 = list(0);
340 if ((got=readtoken()) != TDO) {
341 TRACE(("expecting DO got %s %s\n", tokname[got], got == TWORD ? wordtext : ""));
342 synexpect(TDO);
343 }
344 n1->nbinary.ch2 = list(0);
345 t = TDONE;
346 break;
347 }
348 case TFOR:
349 if (readtoken() != TWORD || quoteflag || ! goodname(wordtext))
350 synerror("Bad for loop variable");
351 n1 = (union node *)stalloc(sizeof (struct nfor));
352 n1->type = NFOR;
353 n1->nfor.linno = savelinno;
354 n1->nfor.var = wordtext;
355 checkkwd = CHKNL | CHKKWD | CHKALIAS;
356 if (readtoken() == TIN) {
357 app = ≈
358 while (readtoken() == TWORD) {
359 n2 = (union node *)stalloc(sizeof (struct narg));
360 n2->type = NARG;
361 n2->narg.text = wordtext;
362 n2->narg.backquote = backquotelist;
363 *app = n2;
364 app = &n2->narg.next;
365 }
366 *app = NULL;
367 n1->nfor.args = ap;
368 if (lasttoken != TNL && lasttoken != TSEMI)
369 synexpect(-1);
370 } else {
371 n2 = (union node *)stalloc(sizeof (struct narg));
372 n2->type = NARG;
373 n2->narg.text = (char *)dolatstr;
374 n2->narg.backquote = NULL;
375 n2->narg.next = NULL;
376 n1->nfor.args = n2;
377 /*
378 * Newline or semicolon here is optional (but note
379 * that the original Bourne shell only allowed NL).
380 */
381 if (lasttoken != TSEMI)
382 tokpushback++;
383 }
384 checkkwd = CHKNL | CHKKWD | CHKALIAS;
385 if (readtoken() != TDO)
386 synexpect(TDO);
387 n1->nfor.body = list(0);
388 t = TDONE;
389 break;
390 case TCASE:
391 n1 = (union node *)stalloc(sizeof (struct ncase));
392 n1->type = NCASE;
393 n1->ncase.linno = savelinno;
394 if (readtoken() != TWORD)
395 synexpect(TWORD);
396 n1->ncase.expr = n2 = (union node *)stalloc(sizeof (struct narg));
397 n2->type = NARG;
398 n2->narg.text = wordtext;
399 n2->narg.backquote = backquotelist;
400 n2->narg.next = NULL;
401 checkkwd = CHKNL | CHKKWD | CHKALIAS;
402 if (readtoken() != TIN)
403 synexpect(TIN);
404 cpp = &n1->ncase.cases;
405 next_case:
406 checkkwd = CHKNL | CHKKWD;
407 t = readtoken();
408 while(t != TESAC) {
409 if (lasttoken == TLP)
410 readtoken();
411 *cpp = cp = (union node *)stalloc(sizeof (struct nclist));
412 cp->type = NCLIST;
413 app = &cp->nclist.pattern;
414 for (;;) {
415 *app = ap = (union node *)stalloc(sizeof (struct narg));
416 ap->type = NARG;
417 ap->narg.text = wordtext;
418 ap->narg.backquote = backquotelist;
419 if (readtoken() != TPIPE)
420 break;
421 app = &ap->narg.next;
422 readtoken();
423 }
424 ap->narg.next = NULL;
425 if (lasttoken != TRP)
426 synexpect(TRP);
427 cp->nclist.body = list(2);
428
429 cpp = &cp->nclist.next;
430
431 checkkwd = CHKNL | CHKKWD;
432 if ((t = readtoken()) != TESAC) {
433 if (t != TENDCASE)
434 synexpect(TENDCASE);
435 else
436 goto next_case;
437 }
438 }
439 *cpp = NULL;
440 goto redir;
441 case TLP:
442 n1 = (union node *)stalloc(sizeof (struct nredir));
443 n1->type = NSUBSHELL;
444 n1->nredir.linno = savelinno;
445 n1->nredir.n = list(0);
446 n1->nredir.redirect = NULL;
447 t = TRP;
448 break;
449 case TBEGIN:
450 n1 = list(0);
451 t = TEND;
452 break;
453 case TWORD:
454 case TREDIR:
455 tokpushback++;
456 return simplecmd();
457 }
458
459 if (readtoken() != t)
460 synexpect(t);
461
462 redir:
463 /* Now check for redirection which may follow command */
464 checkkwd = CHKKWD | CHKALIAS;
465 rpp = rpp2;
466 while (readtoken() == TREDIR) {
467 *rpp = n2 = redirnode;
468 rpp = &n2->nfile.next;
469 parsefname();
470 }
471 tokpushback++;
472 *rpp = NULL;
473 if (redir) {
474 if (n1->type != NSUBSHELL) {
475 n2 = (union node *)stalloc(sizeof (struct nredir));
476 n2->type = NREDIR;
477 n2->nredir.linno = savelinno;
478 n2->nredir.n = n1;
479 n1 = n2;
480 }
481 n1->nredir.redirect = redir;
482 }
483
484 return n1;
485 }
486
487
488 STATIC union node *
simplecmd(void)489 simplecmd(void) {
490 union node *args, **app;
491 union node *n = NULL;
492 union node *vars, **vpp;
493 union node **rpp, *redir;
494 int savecheckkwd;
495 int savelinno;
496
497 args = NULL;
498 app = &args;
499 vars = NULL;
500 vpp = &vars;
501 redir = NULL;
502 rpp = &redir;
503
504 savecheckkwd = CHKALIAS;
505 savelinno = plinno;
506 for (;;) {
507 checkkwd = savecheckkwd;
508 switch (readtoken()) {
509 case TWORD:
510 n = (union node *)stalloc(sizeof (struct narg));
511 n->type = NARG;
512 n->narg.text = wordtext;
513 n->narg.backquote = backquotelist;
514 if (savecheckkwd && isassignment(wordtext)) {
515 *vpp = n;
516 vpp = &n->narg.next;
517 } else {
518 *app = n;
519 app = &n->narg.next;
520 savecheckkwd = 0;
521 }
522 break;
523 case TREDIR:
524 *rpp = n = redirnode;
525 rpp = &n->nfile.next;
526 parsefname(); /* read name of redirection file */
527 break;
528 case TLP:
529 if (
530 args && app == &args->narg.next &&
531 !vars && !redir
532 ) {
533 struct builtincmd *bcmd;
534 const char *name;
535
536 /* We have a function */
537 if (readtoken() != TRP)
538 synexpect(TRP);
539 name = n->narg.text;
540 if (
541 !goodname(name) || (
542 (bcmd = find_builtin(name)) &&
543 bcmd->flags & BUILTIN_SPECIAL
544 )
545 )
546 synerror("Bad function name");
547 n->type = NDEFUN;
548 checkkwd = CHKNL | CHKKWD | CHKALIAS;
549 n->ndefun.text = n->narg.text;
550 n->ndefun.linno = plinno;
551 n->ndefun.body = command();
552 return n;
553 }
554 /* fall through */
555 default:
556 tokpushback++;
557 goto out;
558 }
559 }
560 out:
561 *app = NULL;
562 *vpp = NULL;
563 *rpp = NULL;
564 n = (union node *)stalloc(sizeof (struct ncmd));
565 n->type = NCMD;
566 n->ncmd.linno = savelinno;
567 n->ncmd.args = args;
568 n->ncmd.assign = vars;
569 n->ncmd.redirect = redir;
570 return n;
571 }
572
573 STATIC union node *
makename(void)574 makename(void)
575 {
576 union node *n;
577
578 n = (union node *)stalloc(sizeof (struct narg));
579 n->type = NARG;
580 n->narg.next = NULL;
581 n->narg.text = wordtext;
582 n->narg.backquote = backquotelist;
583 return n;
584 }
585
fixredir(union node * n,const char * text,int err)586 void fixredir(union node *n, const char *text, int err)
587 {
588 TRACE(("Fix redir %s %d\n", text, err));
589 if (!err)
590 n->ndup.vname = NULL;
591
592 if (is_digit(text[0]) && text[1] == '\0')
593 n->ndup.dupfd = digit_val(text[0]);
594 else if (text[0] == '-' && text[1] == '\0')
595 n->ndup.dupfd = -1;
596 else {
597
598 if (err)
599 synerror("Bad fd number");
600 else
601 n->ndup.vname = makename();
602 }
603 }
604
605
606 STATIC void
parsefname(void)607 parsefname(void)
608 {
609 union node *n = redirnode;
610
611 if (n->type == NHERE)
612 checkkwd = CHKEOFMARK;
613 if (readtoken() != TWORD)
614 synexpect(-1);
615 if (n->type == NHERE) {
616 struct heredoc *here = heredoc;
617 struct heredoc *p;
618
619 if (quoteflag == 0)
620 n->type = NXHERE;
621 TRACE(("Here document %d\n", n->type));
622 rmescapes(wordtext);
623 here->eofmark = wordtext;
624 here->next = NULL;
625 if (heredoclist == NULL)
626 heredoclist = here;
627 else {
628 for (p = heredoclist ; p->next ; p = p->next);
629 p->next = here;
630 }
631 } else if (n->type == NTOFD || n->type == NFROMFD) {
632 fixredir(n, wordtext, 0);
633 } else {
634 n->nfile.fname = makename();
635 }
636 }
637
638
639 /*
640 * Input any here documents.
641 */
642
643 STATIC void
parseheredoc(void)644 parseheredoc(void)
645 {
646 struct heredoc *here;
647 union node *n;
648
649 here = heredoclist;
650 heredoclist = 0;
651
652 while (here) {
653 if (needprompt) {
654 setprompt(2);
655 }
656 readtoken1(pgetc(), here->here->type == NHERE? SQSYNTAX : DQSYNTAX,
657 here->eofmark, here->striptabs);
658 n = (union node *)stalloc(sizeof (struct narg));
659 n->narg.type = NARG;
660 n->narg.next = NULL;
661 n->narg.text = wordtext;
662 n->narg.backquote = backquotelist;
663 here->here->nhere.doc = n;
664 here = here->next;
665 }
666 }
667
668 STATIC int
peektoken(void)669 peektoken(void)
670 {
671 int t;
672
673 t = readtoken();
674 tokpushback++;
675 return (t);
676 }
677
678 STATIC int
readtoken(void)679 readtoken(void)
680 {
681 int t;
682 int kwd = checkkwd;
683 #ifdef DEBUG
684 int alreadyseen = tokpushback;
685 #endif
686
687 top:
688 t = xxreadtoken();
689
690 /*
691 * eat newlines
692 */
693 if (kwd & CHKNL) {
694 while (t == TNL) {
695 parseheredoc();
696 t = xxreadtoken();
697 }
698 }
699
700 if (t != TWORD || quoteflag) {
701 goto out;
702 }
703
704 /*
705 * check for keywords
706 */
707 if (kwd & CHKKWD) {
708 const char *const *pp;
709
710 if ((pp = findkwd(wordtext))) {
711 lasttoken = t = pp - parsekwd + KWDOFFSET;
712 TRACE(("keyword %s recognized\n", tokname[t]));
713 goto out;
714 }
715 }
716
717 if (checkkwd & CHKALIAS) {
718 struct alias *ap;
719 if ((ap = lookupalias(wordtext, 1)) != NULL) {
720 if (*ap->val) {
721 pushstring(ap->val, ap);
722 }
723 goto top;
724 }
725 }
726 out:
727 checkkwd = 0;
728 #ifdef DEBUG
729 if (!alreadyseen)
730 TRACE(("token %s %s\n", tokname[t], t == TWORD ? wordtext : ""));
731 else
732 TRACE(("reread token %s %s\n", tokname[t], t == TWORD ? wordtext : ""));
733 #endif
734 return (t);
735 }
736
nlprompt(void)737 static void nlprompt(void)
738 {
739 plinno++;
740 if (doprompt)
741 setprompt(2);
742 }
743
nlnoprompt(void)744 static void nlnoprompt(void)
745 {
746 plinno++;
747 needprompt = doprompt;
748 }
749
750
751 /*
752 * Read the next input token.
753 * If the token is a word, we set backquotelist to the list of cmds in
754 * backquotes. We set quoteflag to true if any part of the word was
755 * quoted.
756 * If the token is TREDIR, then we set redirnode to a structure containing
757 * the redirection.
758 *
759 * [Change comment: here documents and internal procedures]
760 * [Readtoken shouldn't have any arguments. Perhaps we should make the
761 * word parsing code into a separate routine. In this case, readtoken
762 * doesn't need to have any internal procedures, but parseword does.
763 * We could also make parseoperator in essence the main routine, and
764 * have parseword (readtoken1?) handle both words and redirection.]
765 */
766
767 #define RETURN(token) return lasttoken = token
768
769 STATIC int
xxreadtoken(void)770 xxreadtoken(void)
771 {
772 int c;
773
774 if (tokpushback) {
775 tokpushback = 0;
776 return lasttoken;
777 }
778 if (needprompt) {
779 setprompt(2);
780 }
781 for (;;) { /* until token or start of word found */
782 c = pgetc();
783 switch (c) {
784 case ' ': case '\t':
785 case PEOA:
786 continue;
787 case '#':
788 while ((c = pgetc()) != '\n' && c != PEOF);
789 pungetc();
790 continue;
791 case '\\':
792 if (pgetc() == '\n') {
793 nlprompt();
794 continue;
795 }
796 pungetc();
797 goto breakloop;
798 case '\n':
799 nlnoprompt();
800 RETURN(TNL);
801 case PEOF:
802 RETURN(TEOF);
803 case '&':
804 if (pgetc() == '&')
805 RETURN(TAND);
806 pungetc();
807 RETURN(TBACKGND);
808 case '|':
809 if (pgetc() == '|')
810 RETURN(TOR);
811 pungetc();
812 RETURN(TPIPE);
813 case ';':
814 if (pgetc() == ';')
815 RETURN(TENDCASE);
816 pungetc();
817 RETURN(TSEMI);
818 case '(':
819 RETURN(TLP);
820 case ')':
821 RETURN(TRP);
822 default:
823 goto breakloop;
824 }
825 }
826 breakloop:
827 return readtoken1(c, BASESYNTAX, (char *)NULL, 0);
828 #undef RETURN
829 }
830
pgetc_eatbnl(void)831 static int pgetc_eatbnl(void)
832 {
833 int c;
834
835 while ((c = pgetc()) == '\\') {
836 if (pgetc() != '\n') {
837 pungetc();
838 break;
839 }
840
841 nlprompt();
842 }
843
844 return c;
845 }
846
847
848
849 /*
850 * If eofmark is NULL, read a word or a redirection symbol. If eofmark
851 * is not NULL, read a here document. In the latter case, eofmark is the
852 * word which marks the end of the document and striptabs is true if
853 * leading tabs should be stripped from the document. The argument firstc
854 * is the first character of the input token or document.
855 *
856 * Because C does not have internal subroutines, I have simulated them
857 * using goto's to implement the subroutine linkage. The following macros
858 * will run code that appears at the end of readtoken1.
859 */
860
861 #define CHECKEND() {goto checkend; checkend_return:;}
862 #define PARSEREDIR() {goto parseredir; parseredir_return:;}
863 #define PARSESUB() {goto parsesub; parsesub_return:;}
864 #define PARSEBACKQOLD() {oldstyle = 1; goto parsebackq; parsebackq_oldreturn:;}
865 #define PARSEBACKQNEW() {oldstyle = 0; goto parsebackq; parsebackq_newreturn:;}
866 #define PARSEARITH() {goto parsearith; parsearith_return:;}
867
868 STATIC int
readtoken1(int firstc,char const * syntax,char * eofmark,int striptabs)869 readtoken1(int firstc, char const *syntax, char *eofmark, int striptabs)
870 {
871 int c = firstc;
872 char *out;
873 size_t len;
874 struct nodelist *bqlist;
875 int quotef;
876 int dblquote;
877 int varnest; /* levels of variables expansion */
878 int arinest; /* levels of arithmetic expansion */
879 int parenlevel; /* levels of parens in arithmetic */
880 int dqvarnest; /* levels of variables expansion within double quotes */
881 int oldstyle;
882 /* syntax before arithmetic */
883 char const *uninitialized_var(prevsyntax);
884
885 dblquote = 0;
886 if (syntax == DQSYNTAX)
887 dblquote = 1;
888 quotef = 0;
889 bqlist = NULL;
890 varnest = 0;
891 arinest = 0;
892 parenlevel = 0;
893 dqvarnest = 0;
894
895 STARTSTACKSTR(out);
896 loop: { /* for each line, until end of word */
897 #if ATTY
898 if (c == '\034' && doprompt
899 && attyset() && ! equal(termval(), "emacs")) {
900 attyline();
901 if (syntax == BASESYNTAX)
902 return readtoken();
903 c = pgetc();
904 goto loop;
905 }
906 #endif
907 CHECKEND(); /* set c to PEOF if at end of here document */
908 for (;;) { /* until end of line or end of word */
909 CHECKSTRSPACE(4, out); /* permit 4 calls to USTPUTC */
910 switch(syntax[c]) {
911 case CNL: /* '\n' */
912 if (syntax == BASESYNTAX)
913 goto endword; /* exit outer loop */
914 USTPUTC(c, out);
915 nlprompt();
916 c = pgetc();
917 goto loop; /* continue outer loop */
918 case CWORD:
919 USTPUTC(c, out);
920 break;
921 case CCTL:
922 if (eofmark == NULL || dblquote)
923 USTPUTC(CTLESC, out);
924 USTPUTC(c, out);
925 break;
926 /* backslash */
927 case CBACK:
928 c = pgetc2();
929 if (c == PEOF) {
930 USTPUTC(CTLESC, out);
931 USTPUTC('\\', out);
932 pungetc();
933 } else if (c == '\n') {
934 nlprompt();
935 } else {
936 if (
937 dblquote &&
938 c != '\\' && c != '`' &&
939 c != '$' && (
940 c != '"' ||
941 eofmark != NULL
942 )
943 ) {
944 USTPUTC('\\', out);
945 }
946 USTPUTC(CTLESC, out);
947 USTPUTC(c, out);
948 quotef++;
949 }
950 break;
951 case CSQUOTE:
952 syntax = SQSYNTAX;
953 quotemark:
954 if (eofmark == NULL) {
955 USTPUTC(CTLQUOTEMARK, out);
956 }
957 break;
958 case CDQUOTE:
959 syntax = DQSYNTAX;
960 dblquote = 1;
961 goto quotemark;
962 case CENDQUOTE:
963 if (eofmark && !varnest)
964 USTPUTC(c, out);
965 else {
966 if (dqvarnest == 0) {
967 syntax = BASESYNTAX;
968 dblquote = 0;
969 }
970 quotef++;
971 goto quotemark;
972 }
973 break;
974 case CVAR: /* '$' */
975 PARSESUB(); /* parse substitution */
976 break;
977 case CENDVAR: /* '}' */
978 if (varnest > 0) {
979 varnest--;
980 if (dqvarnest > 0) {
981 dqvarnest--;
982 }
983 USTPUTC(CTLENDVAR, out);
984 } else {
985 USTPUTC(c, out);
986 }
987 break;
988 case CLP: /* '(' in arithmetic */
989 parenlevel++;
990 USTPUTC(c, out);
991 break;
992 case CRP: /* ')' in arithmetic */
993 if (parenlevel > 0) {
994 USTPUTC(c, out);
995 --parenlevel;
996 } else {
997 if (pgetc() == ')') {
998 USTPUTC(CTLENDARI, out);
999 if (!--arinest)
1000 syntax = prevsyntax;
1001 } else {
1002 /*
1003 * unbalanced parens
1004 * (don't 2nd guess - no error)
1005 */
1006 pungetc();
1007 USTPUTC(')', out);
1008 }
1009 }
1010 break;
1011 case CBQUOTE: /* '`' */
1012 PARSEBACKQOLD();
1013 break;
1014 case CEOF:
1015 goto endword; /* exit outer loop */
1016 case CIGN:
1017 break;
1018 default:
1019 if (varnest == 0)
1020 goto endword; /* exit outer loop */
1021 if (c != PEOA) {
1022 USTPUTC(c, out);
1023 }
1024 }
1025 c = pgetc();
1026 }
1027 }
1028 endword:
1029 if (syntax == ARISYNTAX)
1030 synerror("Missing '))'");
1031 if (syntax != BASESYNTAX && eofmark == NULL)
1032 synerror("Unterminated quoted string");
1033 if (varnest != 0) {
1034 /* { */
1035 synerror("Missing '}'");
1036 }
1037 USTPUTC('\0', out);
1038 len = out - (char *)stackblock();
1039 out = stackblock();
1040 if (eofmark == NULL) {
1041 if ((c == '>' || c == '<')
1042 && quotef == 0
1043 && len <= 2
1044 && (*out == '\0' || is_digit(*out))) {
1045 PARSEREDIR();
1046 return lasttoken = TREDIR;
1047 } else {
1048 pungetc();
1049 }
1050 }
1051 quoteflag = quotef;
1052 backquotelist = bqlist;
1053 grabstackblock(len);
1054 wordtext = out;
1055 return lasttoken = TWORD;
1056 /* end of readtoken routine */
1057
1058
1059
1060 /*
1061 * Check to see whether we are at the end of the here document. When this
1062 * is called, c is set to the first character of the next input line. If
1063 * we are at the end of the here document, this routine sets the c to PEOF.
1064 */
1065
1066 checkend: {
1067 if (realeofmark(eofmark)) {
1068 int markloc;
1069 char *p;
1070
1071 if (c == PEOA) {
1072 c = pgetc2();
1073 }
1074 if (striptabs) {
1075 while (c == '\t') {
1076 c = pgetc2();
1077 }
1078 }
1079
1080 markloc = out - (char *)stackblock();
1081 for (p = eofmark; STPUTC(c, out), *p; p++) {
1082 if (c != *p)
1083 goto more_heredoc;
1084
1085 c = pgetc2();
1086 }
1087
1088 if (c == '\n' || c == PEOF) {
1089 c = PEOF;
1090 nlnoprompt();
1091 } else {
1092 int len;
1093
1094 more_heredoc:
1095 p = (char *)stackblock() + markloc + 1;
1096 len = out - p;
1097
1098 if (len) {
1099 len -= c < 0;
1100 c = p[-1];
1101
1102 if (len) {
1103 char *str;
1104
1105 str = alloca(len + 1);
1106 *(char *)mempcpy(str, p, len) = 0;
1107
1108 pushstring(str, NULL);
1109 }
1110 }
1111 }
1112
1113 STADJUST((char *)stackblock() + markloc - out, out);
1114 }
1115 goto checkend_return;
1116 }
1117
1118
1119 /*
1120 * Parse a redirection operator. The variable "out" points to a string
1121 * specifying the fd to be redirected. The variable "c" contains the
1122 * first character of the redirection operator.
1123 */
1124
1125 parseredir: {
1126 char fd = *out;
1127 union node *np;
1128
1129 np = (union node *)stalloc(sizeof (struct nfile));
1130 if (c == '>') {
1131 np->nfile.fd = 1;
1132 c = pgetc();
1133 if (c == '>')
1134 np->type = NAPPEND;
1135 else if (c == '|')
1136 np->type = NCLOBBER;
1137 else if (c == '&')
1138 np->type = NTOFD;
1139 else {
1140 np->type = NTO;
1141 pungetc();
1142 }
1143 } else { /* c == '<' */
1144 np->nfile.fd = 0;
1145 switch (c = pgetc()) {
1146 case '<':
1147 if (sizeof (struct nfile) != sizeof (struct nhere)) {
1148 np = (union node *)stalloc(sizeof (struct nhere));
1149 np->nfile.fd = 0;
1150 }
1151 np->type = NHERE;
1152 heredoc = (struct heredoc *)stalloc(sizeof (struct heredoc));
1153 heredoc->here = np;
1154 if ((c = pgetc()) == '-') {
1155 heredoc->striptabs = 1;
1156 } else {
1157 heredoc->striptabs = 0;
1158 pungetc();
1159 }
1160 break;
1161
1162 case '&':
1163 np->type = NFROMFD;
1164 break;
1165
1166 case '>':
1167 np->type = NFROMTO;
1168 break;
1169
1170 default:
1171 np->type = NFROM;
1172 pungetc();
1173 break;
1174 }
1175 }
1176 if (fd != '\0')
1177 np->nfile.fd = digit_val(fd);
1178 redirnode = np;
1179 goto parseredir_return;
1180 }
1181
1182
1183 /*
1184 * Parse a substitution. At this point, we have read the dollar sign
1185 * and nothing else.
1186 */
1187
1188 parsesub: {
1189 int subtype;
1190 int typeloc;
1191 char *p;
1192 static const char types[] = "}-+?=";
1193
1194 c = pgetc_eatbnl();
1195 if (
1196 (checkkwd & CHKEOFMARK) ||
1197 c <= PEOA ||
1198 (c != '(' && c != '{' && !is_name(c) && !is_special(c))
1199 ) {
1200 USTPUTC('$', out);
1201 pungetc();
1202 } else if (c == '(') { /* $(command) or $((arith)) */
1203 if (pgetc_eatbnl() == '(') {
1204 PARSEARITH();
1205 } else {
1206 pungetc();
1207 PARSEBACKQNEW();
1208 }
1209 } else {
1210 USTPUTC(CTLVAR, out);
1211 typeloc = out - (char *)stackblock();
1212 STADJUST(1, out);
1213 subtype = VSNORMAL;
1214 if (likely(c == '{')) {
1215 c = pgetc_eatbnl();
1216 subtype = 0;
1217 }
1218 varname:
1219 if (is_name(c)) {
1220 do {
1221 STPUTC(c, out);
1222 c = pgetc_eatbnl();
1223 } while (is_in_name(c));
1224 } else if (is_digit(c)) {
1225 do {
1226 STPUTC(c, out);
1227 c = pgetc_eatbnl();
1228 } while (is_digit(c));
1229 } else {
1230 int cc = c;
1231
1232 c = pgetc_eatbnl();
1233
1234 if (!subtype && cc == '#') {
1235 subtype = VSLENGTH;
1236
1237 if (c == '_' || isalnum(c))
1238 goto varname;
1239
1240 cc = c;
1241 c = pgetc_eatbnl();
1242 if (cc == '}' || c != '}') {
1243 pungetc();
1244 subtype = 0;
1245 c = cc;
1246 cc = '#';
1247 }
1248 }
1249
1250 if (!is_special(cc)) {
1251 if (subtype == VSLENGTH)
1252 subtype = 0;
1253 goto badsub;
1254 }
1255
1256 USTPUTC(cc, out);
1257 }
1258
1259 if (subtype == 0) {
1260 switch (c) {
1261 case ':':
1262 subtype = VSNUL;
1263 c = pgetc_eatbnl();
1264 /*FALLTHROUGH*/
1265 default:
1266 p = strchr(types, c);
1267 if (p == NULL)
1268 break;
1269 subtype |= p - types + VSNORMAL;
1270 break;
1271 case '%':
1272 case '#':
1273 {
1274 int cc = c;
1275 subtype = c == '#' ? VSTRIMLEFT :
1276 VSTRIMRIGHT;
1277 c = pgetc_eatbnl();
1278 if (c == cc)
1279 subtype++;
1280 else
1281 pungetc();
1282 break;
1283 }
1284 }
1285 } else {
1286 badsub:
1287 pungetc();
1288 }
1289 *((char *)stackblock() + typeloc) = subtype;
1290 if (subtype != VSNORMAL) {
1291 varnest++;
1292 if (dblquote)
1293 dqvarnest++;
1294 }
1295 STPUTC('=', out);
1296 }
1297 goto parsesub_return;
1298 }
1299
1300
1301 /*
1302 * Called to parse command substitutions. Newstyle is set if the command
1303 * is enclosed inside $(...); nlpp is a pointer to the head of the linked
1304 * list of commands (passed by reference), and savelen is the number of
1305 * characters on the top of the stack which must be preserved.
1306 */
1307
1308 parsebackq: {
1309 struct nodelist **nlpp;
1310 union node *n;
1311 char *str;
1312 size_t savelen;
1313 int uninitialized_var(saveprompt);
1314
1315 str = NULL;
1316 savelen = out - (char *)stackblock();
1317 if (savelen > 0) {
1318 str = alloca(savelen);
1319 memcpy(str, stackblock(), savelen);
1320 }
1321 if (oldstyle) {
1322 /* We must read until the closing backquote, giving special
1323 treatment to some slashes, and then push the string and
1324 reread it as input, interpreting it normally. */
1325 char *pout;
1326 int pc;
1327 size_t psavelen;
1328 char *pstr;
1329
1330
1331 STARTSTACKSTR(pout);
1332 for (;;) {
1333 if (needprompt) {
1334 setprompt(2);
1335 }
1336 switch (pc = pgetc()) {
1337 case '`':
1338 goto done;
1339
1340 case '\\':
1341 if ((pc = pgetc()) == '\n') {
1342 nlprompt();
1343 /*
1344 * If eating a newline, avoid putting
1345 * the newline into the new character
1346 * stream (via the STPUTC after the
1347 * switch).
1348 */
1349 continue;
1350 }
1351 if (pc != '\\' && pc != '`' && pc != '$'
1352 && (!dblquote || pc != '"'))
1353 STPUTC('\\', pout);
1354 if (pc > PEOA) {
1355 break;
1356 }
1357 /* fall through */
1358
1359 case PEOF:
1360 case PEOA:
1361 synerror("EOF in backquote substitution");
1362
1363 case '\n':
1364 nlnoprompt();
1365 break;
1366
1367 default:
1368 break;
1369 }
1370 STPUTC(pc, pout);
1371 }
1372 done:
1373 STPUTC('\0', pout);
1374 psavelen = pout - (char *)stackblock();
1375 if (psavelen > 0) {
1376 pstr = grabstackstr(pout);
1377 setinputstring(pstr);
1378 }
1379 }
1380 nlpp = &bqlist;
1381 while (*nlpp)
1382 nlpp = &(*nlpp)->next;
1383 *nlpp = (struct nodelist *)stalloc(sizeof (struct nodelist));
1384 (*nlpp)->next = NULL;
1385
1386 if (oldstyle) {
1387 saveprompt = doprompt;
1388 doprompt = 0;
1389 }
1390
1391 n = list(2);
1392
1393 if (oldstyle)
1394 doprompt = saveprompt;
1395 else {
1396 if (readtoken() != TRP)
1397 synexpect(TRP);
1398 }
1399
1400 (*nlpp)->n = n;
1401 if (oldstyle) {
1402 /*
1403 * Start reading from old file again, ignoring any pushed back
1404 * tokens left from the backquote parsing
1405 */
1406 popfile();
1407 tokpushback = 0;
1408 }
1409 while (stackblocksize() <= savelen)
1410 growstackblock();
1411 STARTSTACKSTR(out);
1412 if (str) {
1413 memcpy(out, str, savelen);
1414 STADJUST(savelen, out);
1415 }
1416 USTPUTC(CTLBACKQ, out);
1417 if (oldstyle)
1418 goto parsebackq_oldreturn;
1419 else
1420 goto parsebackq_newreturn;
1421 }
1422
1423 /*
1424 * Parse an arithmetic expansion (indicate start of one and set state)
1425 */
1426 parsearith: {
1427
1428 if (++arinest == 1) {
1429 prevsyntax = syntax;
1430 syntax = ARISYNTAX;
1431 }
1432 USTPUTC(CTLARI, out);
1433 goto parsearith_return;
1434 }
1435
1436 } /* end of readtoken */
1437
1438
1439
1440 #ifdef mkinit
1441 INCLUDE "parser.h"
1442 #endif
1443
1444
1445 /*
1446 * Return of a legal variable name (a letter or underscore followed by zero or
1447 * more letters, underscores, and digits).
1448 */
1449
1450 char *
1451 endofname(const char *name)
1452 {
1453 char *p;
1454
1455 p = (char *) name;
1456 if (! is_name(*p))
1457 return p;
1458 while (*++p) {
1459 if (! is_in_name(*p))
1460 break;
1461 }
1462 return p;
1463 }
1464
1465
1466 /*
1467 * Called when an unexpected token is read during the parse. The argument
1468 * is the token that is expected, or -1 if more than one type of token can
1469 * occur at this point.
1470 */
1471
1472 STATIC void
synexpect(int token)1473 synexpect(int token)
1474 {
1475 char msg[64];
1476
1477 if (token >= 0) {
1478 fmtstr(msg, 64, "%s unexpected (expecting %s)",
1479 tokname[lasttoken], tokname[token]);
1480 } else {
1481 fmtstr(msg, 64, "%s unexpected", tokname[lasttoken]);
1482 }
1483 synerror(msg);
1484 /* NOTREACHED */
1485 }
1486
1487
1488 STATIC void
synerror(const char * msg)1489 synerror(const char *msg)
1490 {
1491 errlinno = plinno;
1492 sh_error("Syntax error: %s", msg);
1493 /* NOTREACHED */
1494 }
1495
1496 STATIC void
setprompt(int which)1497 setprompt(int which)
1498 {
1499 struct stackmark smark;
1500 int show;
1501
1502 needprompt = 0;
1503 whichprompt = which;
1504
1505 #ifdef USE_LINENOISE
1506 show = 0;
1507 #else
1508 show = 1;
1509 #endif
1510 if (show) {
1511 pushstackmark(&smark, stackblocksize());
1512 out2str(getprompt(NULL));
1513 popstackmark(&smark);
1514 }
1515 }
1516
1517 const char *
expandstr(const char * ps)1518 expandstr(const char *ps)
1519 {
1520 union node n;
1521 int saveprompt;
1522
1523 /* XXX Fix (char *) cast. */
1524 setinputstring((char *)ps);
1525
1526 saveprompt = doprompt;
1527 doprompt = 0;
1528
1529 readtoken1(pgetc(), DQSYNTAX, FAKEEOFMARK, 0);
1530
1531 doprompt = saveprompt;
1532
1533 popfile();
1534
1535 n.narg.type = NARG;
1536 n.narg.next = NULL;
1537 n.narg.text = wordtext;
1538 n.narg.backquote = backquotelist;
1539
1540 expandarg(&n, NULL, EXP_QUOTED);
1541 return stackblock();
1542 }
1543
1544 /*
1545 * called by editline -- any expansions to the prompt
1546 * should be added here.
1547 */
1548 const char *
getprompt(void * unused)1549 getprompt(void *unused)
1550 {
1551 const char *prompt;
1552
1553 switch (whichprompt) {
1554 default:
1555 #ifdef DEBUG
1556 return "<internal prompt error>";
1557 #endif
1558 case 0:
1559 return nullstr;
1560 case 1:
1561 prompt = ps1val();
1562 break;
1563 case 2:
1564 prompt = ps2val();
1565 break;
1566 }
1567
1568 return expandstr(prompt);
1569 }
1570
1571 const char *const *
findkwd(const char * s)1572 findkwd(const char *s)
1573 {
1574 return findstring(
1575 s, parsekwd, sizeof(parsekwd) / sizeof(const char *)
1576 );
1577 }
1578