1 // SPDX-License-Identifier: GPL-2.0-only
2 //
3 // Traverse the source tree, parsing all .gitignore files, and print file paths
4 // that are ignored by git.
5 // The output is suitable to the --exclude-from option of tar.
6 // This is useful until the --exclude-vcs-ignores option gets working correctly.
7 //
8 // Copyright (C) 2023 Masahiro Yamada <masahiroy@kernel.org>
9 // (a lot of code imported from GIT)
10
11 #include <assert.h>
12 #include <dirent.h>
13 #include <errno.h>
14 #include <fcntl.h>
15 #include <getopt.h>
16 #include <stdarg.h>
17 #include <stdbool.h>
18 #include <stdio.h>
19 #include <stdlib.h>
20 #include <string.h>
21 #include <sys/stat.h>
22 #include <sys/types.h>
23 #include <unistd.h>
24
25 // Imported from commit 23c56f7bd5f1667f8b793d796bf30e39545920f6 in GIT
26 //
27 //---------------------------(IMPORT FROM GIT BEGIN)---------------------------
28
29 // Copied from environment.c
30
31 static bool ignore_case;
32
33 // Copied from git-compat-util.h
34
35 /* Sane ctype - no locale, and works with signed chars */
36 #undef isascii
37 #undef isspace
38 #undef isdigit
39 #undef isalpha
40 #undef isalnum
41 #undef isprint
42 #undef islower
43 #undef isupper
44 #undef tolower
45 #undef toupper
46 #undef iscntrl
47 #undef ispunct
48 #undef isxdigit
49
50 static const unsigned char sane_ctype[256];
51 #define GIT_SPACE 0x01
52 #define GIT_DIGIT 0x02
53 #define GIT_ALPHA 0x04
54 #define GIT_GLOB_SPECIAL 0x08
55 #define GIT_REGEX_SPECIAL 0x10
56 #define GIT_PATHSPEC_MAGIC 0x20
57 #define GIT_CNTRL 0x40
58 #define GIT_PUNCT 0x80
59 #define sane_istest(x,mask) ((sane_ctype[(unsigned char)(x)] & (mask)) != 0)
60 #define isascii(x) (((x) & ~0x7f) == 0)
61 #define isspace(x) sane_istest(x,GIT_SPACE)
62 #define isdigit(x) sane_istest(x,GIT_DIGIT)
63 #define isalpha(x) sane_istest(x,GIT_ALPHA)
64 #define isalnum(x) sane_istest(x,GIT_ALPHA | GIT_DIGIT)
65 #define isprint(x) ((x) >= 0x20 && (x) <= 0x7e)
66 #define islower(x) sane_iscase(x, 1)
67 #define isupper(x) sane_iscase(x, 0)
68 #define is_glob_special(x) sane_istest(x,GIT_GLOB_SPECIAL)
69 #define iscntrl(x) (sane_istest(x,GIT_CNTRL))
70 #define ispunct(x) sane_istest(x, GIT_PUNCT | GIT_REGEX_SPECIAL | \
71 GIT_GLOB_SPECIAL | GIT_PATHSPEC_MAGIC)
72 #define isxdigit(x) (hexval_table[(unsigned char)(x)] != -1)
73 #define tolower(x) sane_case((unsigned char)(x), 0x20)
74 #define toupper(x) sane_case((unsigned char)(x), 0)
75
sane_case(int x,int high)76 static inline int sane_case(int x, int high)
77 {
78 if (sane_istest(x, GIT_ALPHA))
79 x = (x & ~0x20) | high;
80 return x;
81 }
82
sane_iscase(int x,int is_lower)83 static inline int sane_iscase(int x, int is_lower)
84 {
85 if (!sane_istest(x, GIT_ALPHA))
86 return 0;
87
88 if (is_lower)
89 return (x & 0x20) != 0;
90 else
91 return (x & 0x20) == 0;
92 }
93
94 // Copied from ctype.c
95
96 enum {
97 S = GIT_SPACE,
98 A = GIT_ALPHA,
99 D = GIT_DIGIT,
100 G = GIT_GLOB_SPECIAL, /* *, ?, [, \\ */
101 R = GIT_REGEX_SPECIAL, /* $, (, ), +, ., ^, {, | */
102 P = GIT_PATHSPEC_MAGIC, /* other non-alnum, except for ] and } */
103 X = GIT_CNTRL,
104 U = GIT_PUNCT,
105 Z = GIT_CNTRL | GIT_SPACE
106 };
107
108 static const unsigned char sane_ctype[256] = {
109 X, X, X, X, X, X, X, X, X, Z, Z, X, X, Z, X, X, /* 0.. 15 */
110 X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, /* 16.. 31 */
111 S, P, P, P, R, P, P, P, R, R, G, R, P, P, R, P, /* 32.. 47 */
112 D, D, D, D, D, D, D, D, D, D, P, P, P, P, P, G, /* 48.. 63 */
113 P, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, /* 64.. 79 */
114 A, A, A, A, A, A, A, A, A, A, A, G, G, U, R, P, /* 80.. 95 */
115 P, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, /* 96..111 */
116 A, A, A, A, A, A, A, A, A, A, A, R, R, U, P, X, /* 112..127 */
117 /* Nothing in the 128.. range */
118 };
119
120 // Copied from hex.c
121
122 static const signed char hexval_table[256] = {
123 -1, -1, -1, -1, -1, -1, -1, -1, /* 00-07 */
124 -1, -1, -1, -1, -1, -1, -1, -1, /* 08-0f */
125 -1, -1, -1, -1, -1, -1, -1, -1, /* 10-17 */
126 -1, -1, -1, -1, -1, -1, -1, -1, /* 18-1f */
127 -1, -1, -1, -1, -1, -1, -1, -1, /* 20-27 */
128 -1, -1, -1, -1, -1, -1, -1, -1, /* 28-2f */
129 0, 1, 2, 3, 4, 5, 6, 7, /* 30-37 */
130 8, 9, -1, -1, -1, -1, -1, -1, /* 38-3f */
131 -1, 10, 11, 12, 13, 14, 15, -1, /* 40-47 */
132 -1, -1, -1, -1, -1, -1, -1, -1, /* 48-4f */
133 -1, -1, -1, -1, -1, -1, -1, -1, /* 50-57 */
134 -1, -1, -1, -1, -1, -1, -1, -1, /* 58-5f */
135 -1, 10, 11, 12, 13, 14, 15, -1, /* 60-67 */
136 -1, -1, -1, -1, -1, -1, -1, -1, /* 68-67 */
137 -1, -1, -1, -1, -1, -1, -1, -1, /* 70-77 */
138 -1, -1, -1, -1, -1, -1, -1, -1, /* 78-7f */
139 -1, -1, -1, -1, -1, -1, -1, -1, /* 80-87 */
140 -1, -1, -1, -1, -1, -1, -1, -1, /* 88-8f */
141 -1, -1, -1, -1, -1, -1, -1, -1, /* 90-97 */
142 -1, -1, -1, -1, -1, -1, -1, -1, /* 98-9f */
143 -1, -1, -1, -1, -1, -1, -1, -1, /* a0-a7 */
144 -1, -1, -1, -1, -1, -1, -1, -1, /* a8-af */
145 -1, -1, -1, -1, -1, -1, -1, -1, /* b0-b7 */
146 -1, -1, -1, -1, -1, -1, -1, -1, /* b8-bf */
147 -1, -1, -1, -1, -1, -1, -1, -1, /* c0-c7 */
148 -1, -1, -1, -1, -1, -1, -1, -1, /* c8-cf */
149 -1, -1, -1, -1, -1, -1, -1, -1, /* d0-d7 */
150 -1, -1, -1, -1, -1, -1, -1, -1, /* d8-df */
151 -1, -1, -1, -1, -1, -1, -1, -1, /* e0-e7 */
152 -1, -1, -1, -1, -1, -1, -1, -1, /* e8-ef */
153 -1, -1, -1, -1, -1, -1, -1, -1, /* f0-f7 */
154 -1, -1, -1, -1, -1, -1, -1, -1, /* f8-ff */
155 };
156
157 // Copied from wildmatch.h
158
159 #define WM_CASEFOLD 1
160 #define WM_PATHNAME 2
161
162 #define WM_NOMATCH 1
163 #define WM_MATCH 0
164 #define WM_ABORT_ALL -1
165 #define WM_ABORT_TO_STARSTAR -2
166
167 // Copied from wildmatch.c
168
169 typedef unsigned char uchar;
170
171 // local modification: remove NEGATE_CLASS(2)
172
173 #define CC_EQ(class, len, litmatch) ((len) == sizeof (litmatch)-1 \
174 && *(class) == *(litmatch) \
175 && strncmp((char*)class, litmatch, len) == 0)
176
177 // local modification: simpilify macros
178 #define ISBLANK(c) ((c) == ' ' || (c) == '\t')
179 #define ISGRAPH(c) (isprint(c) && !isspace(c))
180 #define ISPRINT(c) isprint(c)
181 #define ISDIGIT(c) isdigit(c)
182 #define ISALNUM(c) isalnum(c)
183 #define ISALPHA(c) isalpha(c)
184 #define ISCNTRL(c) iscntrl(c)
185 #define ISLOWER(c) islower(c)
186 #define ISPUNCT(c) ispunct(c)
187 #define ISSPACE(c) isspace(c)
188 #define ISUPPER(c) isupper(c)
189 #define ISXDIGIT(c) isxdigit(c)
190
191 /* Match pattern "p" against "text" */
dowild(const uchar * p,const uchar * text,unsigned int flags)192 static int dowild(const uchar *p, const uchar *text, unsigned int flags)
193 {
194 uchar p_ch;
195 const uchar *pattern = p;
196
197 for ( ; (p_ch = *p) != '\0'; text++, p++) {
198 int matched, match_slash, negated;
199 uchar t_ch, prev_ch;
200 if ((t_ch = *text) == '\0' && p_ch != '*')
201 return WM_ABORT_ALL;
202 if ((flags & WM_CASEFOLD) && ISUPPER(t_ch))
203 t_ch = tolower(t_ch);
204 if ((flags & WM_CASEFOLD) && ISUPPER(p_ch))
205 p_ch = tolower(p_ch);
206 switch (p_ch) {
207 case '\\':
208 /* Literal match with following character. Note that the test
209 * in "default" handles the p[1] == '\0' failure case. */
210 p_ch = *++p;
211 /* FALLTHROUGH */
212 default:
213 if (t_ch != p_ch)
214 return WM_NOMATCH;
215 continue;
216 case '?':
217 /* Match anything but '/'. */
218 if ((flags & WM_PATHNAME) && t_ch == '/')
219 return WM_NOMATCH;
220 continue;
221 case '*':
222 if (*++p == '*') {
223 const uchar *prev_p = p - 2;
224 while (*++p == '*') {}
225 if (!(flags & WM_PATHNAME))
226 /* without WM_PATHNAME, '*' == '**' */
227 match_slash = 1;
228 else if ((prev_p < pattern || *prev_p == '/') &&
229 (*p == '\0' || *p == '/' ||
230 (p[0] == '\\' && p[1] == '/'))) {
231 /*
232 * Assuming we already match 'foo/' and are at
233 * <star star slash>, just assume it matches
234 * nothing and go ahead match the rest of the
235 * pattern with the remaining string. This
236 * helps make foo/<*><*>/bar (<> because
237 * otherwise it breaks C comment syntax) match
238 * both foo/bar and foo/a/bar.
239 */
240 if (p[0] == '/' &&
241 dowild(p + 1, text, flags) == WM_MATCH)
242 return WM_MATCH;
243 match_slash = 1;
244 } else /* WM_PATHNAME is set */
245 match_slash = 0;
246 } else
247 /* without WM_PATHNAME, '*' == '**' */
248 match_slash = flags & WM_PATHNAME ? 0 : 1;
249 if (*p == '\0') {
250 /* Trailing "**" matches everything. Trailing "*" matches
251 * only if there are no more slash characters. */
252 if (!match_slash) {
253 if (strchr((char *)text, '/'))
254 return WM_NOMATCH;
255 }
256 return WM_MATCH;
257 } else if (!match_slash && *p == '/') {
258 /*
259 * _one_ asterisk followed by a slash
260 * with WM_PATHNAME matches the next
261 * directory
262 */
263 const char *slash = strchr((char*)text, '/');
264 if (!slash)
265 return WM_NOMATCH;
266 text = (const uchar*)slash;
267 /* the slash is consumed by the top-level for loop */
268 break;
269 }
270 while (1) {
271 if (t_ch == '\0')
272 break;
273 /*
274 * Try to advance faster when an asterisk is
275 * followed by a literal. We know in this case
276 * that the string before the literal
277 * must belong to "*".
278 * If match_slash is false, do not look past
279 * the first slash as it cannot belong to '*'.
280 */
281 if (!is_glob_special(*p)) {
282 p_ch = *p;
283 if ((flags & WM_CASEFOLD) && ISUPPER(p_ch))
284 p_ch = tolower(p_ch);
285 while ((t_ch = *text) != '\0' &&
286 (match_slash || t_ch != '/')) {
287 if ((flags & WM_CASEFOLD) && ISUPPER(t_ch))
288 t_ch = tolower(t_ch);
289 if (t_ch == p_ch)
290 break;
291 text++;
292 }
293 if (t_ch != p_ch)
294 return WM_NOMATCH;
295 }
296 if ((matched = dowild(p, text, flags)) != WM_NOMATCH) {
297 if (!match_slash || matched != WM_ABORT_TO_STARSTAR)
298 return matched;
299 } else if (!match_slash && t_ch == '/')
300 return WM_ABORT_TO_STARSTAR;
301 t_ch = *++text;
302 }
303 return WM_ABORT_ALL;
304 case '[':
305 p_ch = *++p;
306 if (p_ch == '^')
307 p_ch = '!';
308 /* Assign literal 1/0 because of "matched" comparison. */
309 negated = p_ch == '!' ? 1 : 0;
310 if (negated) {
311 /* Inverted character class. */
312 p_ch = *++p;
313 }
314 prev_ch = 0;
315 matched = 0;
316 do {
317 if (!p_ch)
318 return WM_ABORT_ALL;
319 if (p_ch == '\\') {
320 p_ch = *++p;
321 if (!p_ch)
322 return WM_ABORT_ALL;
323 if (t_ch == p_ch)
324 matched = 1;
325 } else if (p_ch == '-' && prev_ch && p[1] && p[1] != ']') {
326 p_ch = *++p;
327 if (p_ch == '\\') {
328 p_ch = *++p;
329 if (!p_ch)
330 return WM_ABORT_ALL;
331 }
332 if (t_ch <= p_ch && t_ch >= prev_ch)
333 matched = 1;
334 else if ((flags & WM_CASEFOLD) && ISLOWER(t_ch)) {
335 uchar t_ch_upper = toupper(t_ch);
336 if (t_ch_upper <= p_ch && t_ch_upper >= prev_ch)
337 matched = 1;
338 }
339 p_ch = 0; /* This makes "prev_ch" get set to 0. */
340 } else if (p_ch == '[' && p[1] == ':') {
341 const uchar *s;
342 int i;
343 for (s = p += 2; (p_ch = *p) && p_ch != ']'; p++) {} /*SHARED ITERATOR*/
344 if (!p_ch)
345 return WM_ABORT_ALL;
346 i = p - s - 1;
347 if (i < 0 || p[-1] != ':') {
348 /* Didn't find ":]", so treat like a normal set. */
349 p = s - 2;
350 p_ch = '[';
351 if (t_ch == p_ch)
352 matched = 1;
353 continue;
354 }
355 if (CC_EQ(s,i, "alnum")) {
356 if (ISALNUM(t_ch))
357 matched = 1;
358 } else if (CC_EQ(s,i, "alpha")) {
359 if (ISALPHA(t_ch))
360 matched = 1;
361 } else if (CC_EQ(s,i, "blank")) {
362 if (ISBLANK(t_ch))
363 matched = 1;
364 } else if (CC_EQ(s,i, "cntrl")) {
365 if (ISCNTRL(t_ch))
366 matched = 1;
367 } else if (CC_EQ(s,i, "digit")) {
368 if (ISDIGIT(t_ch))
369 matched = 1;
370 } else if (CC_EQ(s,i, "graph")) {
371 if (ISGRAPH(t_ch))
372 matched = 1;
373 } else if (CC_EQ(s,i, "lower")) {
374 if (ISLOWER(t_ch))
375 matched = 1;
376 } else if (CC_EQ(s,i, "print")) {
377 if (ISPRINT(t_ch))
378 matched = 1;
379 } else if (CC_EQ(s,i, "punct")) {
380 if (ISPUNCT(t_ch))
381 matched = 1;
382 } else if (CC_EQ(s,i, "space")) {
383 if (ISSPACE(t_ch))
384 matched = 1;
385 } else if (CC_EQ(s,i, "upper")) {
386 if (ISUPPER(t_ch))
387 matched = 1;
388 else if ((flags & WM_CASEFOLD) && ISLOWER(t_ch))
389 matched = 1;
390 } else if (CC_EQ(s,i, "xdigit")) {
391 if (ISXDIGIT(t_ch))
392 matched = 1;
393 } else /* malformed [:class:] string */
394 return WM_ABORT_ALL;
395 p_ch = 0; /* This makes "prev_ch" get set to 0. */
396 } else if (t_ch == p_ch)
397 matched = 1;
398 } while (prev_ch = p_ch, (p_ch = *++p) != ']');
399 if (matched == negated ||
400 ((flags & WM_PATHNAME) && t_ch == '/'))
401 return WM_NOMATCH;
402 continue;
403 }
404 }
405
406 return *text ? WM_NOMATCH : WM_MATCH;
407 }
408
409 /* Match the "pattern" against the "text" string. */
wildmatch(const char * pattern,const char * text,unsigned int flags)410 static int wildmatch(const char *pattern, const char *text, unsigned int flags)
411 {
412 // local modification: move WM_CASEFOLD here
413 if (ignore_case)
414 flags |= WM_CASEFOLD;
415
416 return dowild((const uchar*)pattern, (const uchar*)text, flags);
417 }
418
419 // Copied from dir.h
420
421 #define PATTERN_FLAG_NODIR 1
422 #define PATTERN_FLAG_ENDSWITH 4
423 #define PATTERN_FLAG_MUSTBEDIR 8
424 #define PATTERN_FLAG_NEGATIVE 16
425
426 // Copied from dir.c
427
fspathncmp(const char * a,const char * b,size_t count)428 static int fspathncmp(const char *a, const char *b, size_t count)
429 {
430 return ignore_case ? strncasecmp(a, b, count) : strncmp(a, b, count);
431 }
432
simple_length(const char * match)433 static int simple_length(const char *match)
434 {
435 int len = -1;
436
437 for (;;) {
438 unsigned char c = *match++;
439 len++;
440 if (c == '\0' || is_glob_special(c))
441 return len;
442 }
443 }
444
no_wildcard(const char * string)445 static int no_wildcard(const char *string)
446 {
447 return string[simple_length(string)] == '\0';
448 }
449
parse_path_pattern(const char ** pattern,int * patternlen,unsigned * flags,int * nowildcardlen)450 static void parse_path_pattern(const char **pattern,
451 int *patternlen,
452 unsigned *flags,
453 int *nowildcardlen)
454 {
455 const char *p = *pattern;
456 size_t i, len;
457
458 *flags = 0;
459 if (*p == '!') {
460 *flags |= PATTERN_FLAG_NEGATIVE;
461 p++;
462 }
463 len = strlen(p);
464 if (len && p[len - 1] == '/') {
465 len--;
466 *flags |= PATTERN_FLAG_MUSTBEDIR;
467 }
468 for (i = 0; i < len; i++) {
469 if (p[i] == '/')
470 break;
471 }
472 if (i == len)
473 *flags |= PATTERN_FLAG_NODIR;
474 *nowildcardlen = simple_length(p);
475 /*
476 * we should have excluded the trailing slash from 'p' too,
477 * but that's one more allocation. Instead just make sure
478 * nowildcardlen does not exceed real patternlen
479 */
480 if (*nowildcardlen > len)
481 *nowildcardlen = len;
482 if (*p == '*' && no_wildcard(p + 1))
483 *flags |= PATTERN_FLAG_ENDSWITH;
484 *pattern = p;
485 *patternlen = len;
486 }
487
trim_trailing_spaces(char * buf)488 static void trim_trailing_spaces(char *buf)
489 {
490 char *p, *last_space = NULL;
491
492 for (p = buf; *p; p++)
493 switch (*p) {
494 case ' ':
495 if (!last_space)
496 last_space = p;
497 break;
498 case '\\':
499 p++;
500 if (!*p)
501 return;
502 /* fallthrough */
503 default:
504 last_space = NULL;
505 }
506
507 if (last_space)
508 *last_space = '\0';
509 }
510
match_basename(const char * basename,int basenamelen,const char * pattern,int prefix,int patternlen,unsigned flags)511 static int match_basename(const char *basename, int basenamelen,
512 const char *pattern, int prefix, int patternlen,
513 unsigned flags)
514 {
515 if (prefix == patternlen) {
516 if (patternlen == basenamelen &&
517 !fspathncmp(pattern, basename, basenamelen))
518 return 1;
519 } else if (flags & PATTERN_FLAG_ENDSWITH) {
520 /* "*literal" matching against "fooliteral" */
521 if (patternlen - 1 <= basenamelen &&
522 !fspathncmp(pattern + 1,
523 basename + basenamelen - (patternlen - 1),
524 patternlen - 1))
525 return 1;
526 } else {
527 // local modification: call wildmatch() directly
528 if (!wildmatch(pattern, basename, flags))
529 return 1;
530 }
531 return 0;
532 }
533
match_pathname(const char * pathname,int pathlen,const char * base,int baselen,const char * pattern,int prefix,int patternlen)534 static int match_pathname(const char *pathname, int pathlen,
535 const char *base, int baselen,
536 const char *pattern, int prefix, int patternlen)
537 {
538 // local modification: remove local variables
539
540 /*
541 * match with FNM_PATHNAME; the pattern has base implicitly
542 * in front of it.
543 */
544 if (*pattern == '/') {
545 pattern++;
546 patternlen--;
547 prefix--;
548 }
549
550 /*
551 * baselen does not count the trailing slash. base[] may or
552 * may not end with a trailing slash though.
553 */
554 if (pathlen < baselen + 1 ||
555 (baselen && pathname[baselen] != '/') ||
556 fspathncmp(pathname, base, baselen))
557 return 0;
558
559 // local modification: simplified because always baselen > 0
560 pathname += baselen + 1;
561 pathlen -= baselen + 1;
562
563 if (prefix) {
564 /*
565 * if the non-wildcard part is longer than the
566 * remaining pathname, surely it cannot match.
567 */
568 if (prefix > pathlen)
569 return 0;
570
571 if (fspathncmp(pattern, pathname, prefix))
572 return 0;
573 pattern += prefix;
574 patternlen -= prefix;
575 pathname += prefix;
576 pathlen -= prefix;
577
578 /*
579 * If the whole pattern did not have a wildcard,
580 * then our prefix match is all we need; we
581 * do not need to call fnmatch at all.
582 */
583 if (!patternlen && !pathlen)
584 return 1;
585 }
586
587 // local modification: call wildmatch() directly
588 return !wildmatch(pattern, pathname, WM_PATHNAME);
589 }
590
591 // Copied from git/utf8.c
592
593 static const char utf8_bom[] = "\357\273\277";
594
595 //----------------------------(IMPORT FROM GIT END)----------------------------
596
597 struct pattern {
598 unsigned int flags;
599 int nowildcardlen;
600 int patternlen;
601 int dirlen;
602 char pattern[];
603 };
604
605 static struct pattern **pattern_list;
606 static int nr_patterns, alloced_patterns;
607
608 // Remember the number of patterns at each directory level
609 static int *nr_patterns_at;
610 // Track the current/max directory level;
611 static int depth, max_depth;
612 static bool debug_on;
613 static FILE *out_fp, *stat_fp;
614 static char *prefix = "";
615 static char *progname;
616
perror_exit(const char * s)617 static void __attribute__((noreturn)) perror_exit(const char *s)
618 {
619 perror(s);
620
621 exit(EXIT_FAILURE);
622 }
623
error_exit(const char * fmt,...)624 static void __attribute__((noreturn)) error_exit(const char *fmt, ...)
625 {
626 va_list args;
627
628 fprintf(stderr, "%s: error: ", progname);
629
630 va_start(args, fmt);
631 vfprintf(stderr, fmt, args);
632 va_end(args);
633
634 exit(EXIT_FAILURE);
635 }
636
debug(const char * fmt,...)637 static void debug(const char *fmt, ...)
638 {
639 va_list args;
640 int i;
641
642 if (!debug_on)
643 return;
644
645 fprintf(stderr, "[DEBUG] ");
646
647 for (i = 0; i < depth * 2; i++)
648 fputc(' ', stderr);
649
650 va_start(args, fmt);
651 vfprintf(stderr, fmt, args);
652 va_end(args);
653 }
654
xrealloc(void * ptr,size_t size)655 static void *xrealloc(void *ptr, size_t size)
656 {
657 ptr = realloc(ptr, size);
658 if (!ptr)
659 perror_exit(progname);
660
661 return ptr;
662 }
663
xmalloc(size_t size)664 static void *xmalloc(size_t size)
665 {
666 return xrealloc(NULL, size);
667 }
668
669 // similar to last_matching_pattern_from_list() in GIT
is_ignored(const char * path,int pathlen,int dirlen,bool is_dir)670 static bool is_ignored(const char *path, int pathlen, int dirlen, bool is_dir)
671 {
672 int i;
673
674 // Search in the reverse order because the last matching pattern wins.
675 for (i = nr_patterns - 1; i >= 0; i--) {
676 struct pattern *p = pattern_list[i];
677 unsigned int flags = p->flags;
678 const char *gitignore_dir = p->pattern + p->patternlen + 1;
679 bool ignored;
680
681 if ((flags & PATTERN_FLAG_MUSTBEDIR) && !is_dir)
682 continue;
683
684 if (flags & PATTERN_FLAG_NODIR) {
685 if (!match_basename(path + dirlen + 1,
686 pathlen - dirlen - 1,
687 p->pattern,
688 p->nowildcardlen,
689 p->patternlen,
690 p->flags))
691 continue;
692 } else {
693 if (!match_pathname(path, pathlen,
694 gitignore_dir, p->dirlen,
695 p->pattern,
696 p->nowildcardlen,
697 p->patternlen))
698 continue;
699 }
700
701 debug("%s: matches %s%s%s (%s/.gitignore)\n", path,
702 flags & PATTERN_FLAG_NEGATIVE ? "!" : "", p->pattern,
703 flags & PATTERN_FLAG_MUSTBEDIR ? "/" : "",
704 gitignore_dir);
705
706 ignored = (flags & PATTERN_FLAG_NEGATIVE) == 0;
707 if (ignored)
708 debug("Ignore: %s\n", path);
709
710 return ignored;
711 }
712
713 debug("%s: no match\n", path);
714
715 return false;
716 }
717
add_pattern(const char * string,const char * dir,int dirlen)718 static void add_pattern(const char *string, const char *dir, int dirlen)
719 {
720 struct pattern *p;
721 int patternlen, nowildcardlen;
722 unsigned int flags;
723
724 parse_path_pattern(&string, &patternlen, &flags, &nowildcardlen);
725
726 if (patternlen == 0)
727 return;
728
729 p = xmalloc(sizeof(*p) + patternlen + dirlen + 2);
730
731 memcpy(p->pattern, string, patternlen);
732 p->pattern[patternlen] = 0;
733 memcpy(p->pattern + patternlen + 1, dir, dirlen);
734 p->pattern[patternlen + 1 + dirlen] = 0;
735
736 p->patternlen = patternlen;
737 p->nowildcardlen = nowildcardlen;
738 p->dirlen = dirlen;
739 p->flags = flags;
740
741 debug("Add pattern: %s%s%s\n",
742 flags & PATTERN_FLAG_NEGATIVE ? "!" : "", p->pattern,
743 flags & PATTERN_FLAG_MUSTBEDIR ? "/" : "");
744
745 if (nr_patterns >= alloced_patterns) {
746 alloced_patterns += 128;
747 pattern_list = xrealloc(pattern_list,
748 sizeof(*pattern_list) * alloced_patterns);
749 }
750
751 pattern_list[nr_patterns++] = p;
752 }
753
754 // similar to add_patterns_from_buffer() in GIT
add_patterns_from_gitignore(const char * dir,int dirlen)755 static void add_patterns_from_gitignore(const char *dir, int dirlen)
756 {
757 struct stat st;
758 char path[PATH_MAX], *buf, *entry;
759 size_t size;
760 int fd, pathlen, i;
761
762 pathlen = snprintf(path, sizeof(path), "%s/.gitignore", dir);
763 if (pathlen >= sizeof(path))
764 error_exit("%s: too long path was truncated\n", path);
765
766 fd = open(path, O_RDONLY | O_NOFOLLOW);
767 if (fd < 0) {
768 if (errno != ENOENT)
769 return perror_exit(path);
770 return;
771 }
772
773 if (fstat(fd, &st) < 0)
774 perror_exit(path);
775
776 size = st.st_size;
777
778 buf = xmalloc(size + 1);
779 if (read(fd, buf, st.st_size) != st.st_size)
780 perror_exit(path);
781
782 buf[st.st_size] = '\n';
783 if (close(fd))
784 perror_exit(path);
785
786 debug("Parse %s\n", path);
787
788 entry = buf;
789
790 // skip utf8 bom
791 if (!strncmp(entry, utf8_bom, strlen(utf8_bom)))
792 entry += strlen(utf8_bom);
793
794 for (i = entry - buf; i < size; i++) {
795 if (buf[i] == '\n') {
796 if (entry != buf + i && entry[0] != '#') {
797 buf[i - (i && buf[i-1] == '\r')] = 0;
798 trim_trailing_spaces(entry);
799 add_pattern(entry, dir, dirlen);
800 }
801 entry = buf + i + 1;
802 }
803 }
804
805 free(buf);
806 }
807
808 // Save the current number of patterns and increment the depth
increment_depth(void)809 static void increment_depth(void)
810 {
811 if (depth >= max_depth) {
812 max_depth += 1;
813 nr_patterns_at = xrealloc(nr_patterns_at,
814 sizeof(*nr_patterns_at) * max_depth);
815 }
816
817 nr_patterns_at[depth] = nr_patterns;
818 depth++;
819 }
820
821 // Decrement the depth, and free up the patterns of this directory level.
decrement_depth(void)822 static void decrement_depth(void)
823 {
824 depth--;
825 assert(depth >= 0);
826
827 while (nr_patterns > nr_patterns_at[depth])
828 free(pattern_list[--nr_patterns]);
829 }
830
print_path(const char * path)831 static void print_path(const char *path)
832 {
833 // The path always starts with "./"
834 assert(strlen(path) >= 2);
835
836 // Replace the root directory with a preferred prefix.
837 // This is useful for the tar command.
838 fprintf(out_fp, "%s%s\n", prefix, path + 2);
839 }
840
print_stat(const char * path,struct stat * st)841 static void print_stat(const char *path, struct stat *st)
842 {
843 if (!stat_fp)
844 return;
845
846 if (!S_ISREG(st->st_mode) && !S_ISLNK(st->st_mode))
847 return;
848
849 assert(strlen(path) >= 2);
850
851 fprintf(stat_fp, "%c %9ld %10ld %s\n",
852 S_ISLNK(st->st_mode) ? 'l' : '-',
853 st->st_size, st->st_mtim.tv_sec, path + 2);
854 }
855
856 // Traverse the entire directory tree, parsing .gitignore files.
857 // Print file paths that are not tracked by git.
858 //
859 // Return true if all files under the directory are ignored, false otherwise.
traverse_directory(const char * dir,int dirlen)860 static bool traverse_directory(const char *dir, int dirlen)
861 {
862 bool all_ignored = true;
863 DIR *dirp;
864
865 debug("Enter[%d]: %s\n", depth, dir);
866 increment_depth();
867
868 add_patterns_from_gitignore(dir, dirlen);
869
870 dirp = opendir(dir);
871 if (!dirp)
872 perror_exit(dir);
873
874 while (1) {
875 struct dirent *d;
876 struct stat st;
877 char path[PATH_MAX];
878 int pathlen;
879 bool ignored;
880
881 errno = 0;
882 d = readdir(dirp);
883 if (!d) {
884 if (errno)
885 perror_exit(dir);
886 break;
887 }
888
889 if (!strcmp(d->d_name, "..") || !strcmp(d->d_name, "."))
890 continue;
891
892 pathlen = snprintf(path, sizeof(path), "%s/%s", dir, d->d_name);
893 if (pathlen >= sizeof(path))
894 error_exit("%s: too long path was truncated\n", path);
895
896 if (lstat(path, &st) < 0)
897 perror_exit(path);
898
899 if ((!S_ISREG(st.st_mode) && !S_ISDIR(st.st_mode) && !S_ISLNK(st.st_mode)) ||
900 is_ignored(path, pathlen, dirlen, S_ISDIR(st.st_mode))) {
901 ignored = true;
902 } else {
903 if (S_ISDIR(st.st_mode) && !S_ISLNK(st.st_mode))
904 // If all the files in a directory are ignored,
905 // let's ignore that directory as well. This
906 // will avoid empty directories in the tarball.
907 ignored = traverse_directory(path, pathlen);
908 else
909 ignored = false;
910 }
911
912 if (ignored) {
913 print_path(path);
914 } else {
915 print_stat(path, &st);
916 all_ignored = false;
917 }
918 }
919
920 if (closedir(dirp))
921 perror_exit(dir);
922
923 decrement_depth();
924 debug("Leave[%d]: %s\n", depth, dir);
925
926 return all_ignored;
927 }
928
usage(void)929 static void usage(void)
930 {
931 fprintf(stderr,
932 "usage: %s [options]\n"
933 "\n"
934 "Show files that are ignored by git\n"
935 "\n"
936 "options:\n"
937 " -d, --debug print debug messages to stderr\n"
938 " -e, --exclude PATTERN add the given exclude pattern\n"
939 " -h, --help show this help message and exit\n"
940 " -i, --ignore-case Ignore case differences between the patterns and the files\n"
941 " -o, --output FILE output the ignored files to a file (default: '-', i.e. stdout)\n"
942 " -p, --prefix PREFIX prefix added to each path (default: empty string)\n"
943 " -r, --rootdir DIR root of the source tree (default: current working directory)\n"
944 " -s, --stat FILE output the file stat of non-ignored files to a file\n",
945 progname);
946 }
947
open_output(const char * pathname,FILE ** fp)948 static void open_output(const char *pathname, FILE **fp)
949 {
950 if (strcmp(pathname, "-")) {
951 *fp = fopen(pathname, "w");
952 if (!*fp)
953 perror_exit(pathname);
954 } else {
955 *fp = stdout;
956 }
957 }
958
close_output(const char * pathname,FILE * fp)959 static void close_output(const char *pathname, FILE *fp)
960 {
961 fflush(fp);
962
963 if (ferror(fp))
964 error_exit("not all data was written to the output\n");
965
966 if (fclose(fp))
967 perror_exit(pathname);
968 }
969
main(int argc,char * argv[])970 int main(int argc, char *argv[])
971 {
972 const char *output = "-";
973 const char *rootdir = ".";
974 const char *stat = NULL;
975
976 progname = strrchr(argv[0], '/');
977 if (progname)
978 progname++;
979 else
980 progname = argv[0];
981
982 while (1) {
983 static struct option long_options[] = {
984 {"debug", no_argument, NULL, 'd'},
985 {"help", no_argument, NULL, 'h'},
986 {"ignore-case", no_argument, NULL, 'i'},
987 {"output", required_argument, NULL, 'o'},
988 {"prefix", required_argument, NULL, 'p'},
989 {"rootdir", required_argument, NULL, 'r'},
990 {"stat", required_argument, NULL, 's'},
991 {"exclude", required_argument, NULL, 'x'},
992 {},
993 };
994
995 int c = getopt_long(argc, argv, "dhino:p:r:s:x:", long_options, NULL);
996
997 if (c == -1)
998 break;
999
1000 switch (c) {
1001 case 'd':
1002 debug_on = true;
1003 break;
1004 case 'h':
1005 usage();
1006 exit(0);
1007 case 'i':
1008 ignore_case = true;
1009 break;
1010 case 'o':
1011 output = optarg;
1012 break;
1013 case 'p':
1014 prefix = optarg;
1015 break;
1016 case 'r':
1017 rootdir = optarg;
1018 break;
1019 case 's':
1020 stat = optarg;
1021 break;
1022 case 'x':
1023 add_pattern(optarg, ".", strlen("."));
1024 break;
1025 case '?':
1026 usage();
1027 /* fallthrough */
1028 default:
1029 exit(EXIT_FAILURE);
1030 }
1031 }
1032
1033 open_output(output, &out_fp);
1034 if (stat && stat[0])
1035 open_output(stat, &stat_fp);
1036
1037 if (chdir(rootdir))
1038 perror_exit(rootdir);
1039
1040 add_pattern(".git/", ".", strlen("."));
1041
1042 if (traverse_directory(".", strlen(".")))
1043 print_path("./");
1044
1045 assert(depth == 0);
1046
1047 while (nr_patterns > 0)
1048 free(pattern_list[--nr_patterns]);
1049 free(pattern_list);
1050 free(nr_patterns_at);
1051
1052 close_output(output, out_fp);
1053 if (stat_fp)
1054 close_output(stat, stat_fp);
1055
1056 return 0;
1057 }
1058