1 /* Copyright (C) 1991, 1992, 1993, 1996, 1997, 1998, 1999, 2000, 2001,
2 2002, 2003 Free Software Foundation, Inc.
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2, or (at your option)
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software Foundation,
16 Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
18 /* Match STRING against the filename pattern PATTERN, returning zero if
19 it matches, nonzero if not. */
20 static int EXT (INT opt, const CHAR *pattern, const CHAR *string,
21 const CHAR *string_end, int no_leading_period, int flags)
23 static const CHAR *END (const CHAR *patternp) internal_function;
27 FCT (const CHAR *pattern, const CHAR *string, const CHAR *string_end,
28 int no_leading_period, int flags)
30 register const CHAR *p = pattern, *n = string;
33 # if WIDE_CHAR_VERSION
34 const char *collseq = (const char *)
35 _NL_CURRENT(LC_COLLATE, _NL_COLLATE_COLLSEQWC);
37 const UCHAR *collseq = (const UCHAR *)
38 _NL_CURRENT(LC_COLLATE, _NL_COLLATE_COLLSEQMB);
42 while ((c = *p++) != L('\0'))
44 int new_no_leading_period = 0;
50 if (__builtin_expect (flags & FNM_EXTMATCH, 0) && *p == '(')
54 res = EXT (c, p, n, string_end, no_leading_period,
62 else if (*n == L('/') && (flags & FNM_FILE_NAME))
64 else if (*n == L('.') && no_leading_period)
69 if (!(flags & FNM_NOESCAPE))
73 /* Trailing \ loses. */
77 if (n == string_end || FOLD ((UCHAR) *n) != c)
82 if (__builtin_expect (flags & FNM_EXTMATCH, 0) && *p == '(')
86 res = EXT (c, p, n, string_end, no_leading_period,
92 if (n != string_end && *n == L('.') && no_leading_period)
95 for (c = *p++; c == L('?') || c == L('*'); c = *p++)
97 if (*p == L('(') && (flags & FNM_EXTMATCH) != 0)
99 const CHAR *endp = END (p);
102 /* This is a pattern. Skip over it. */
110 /* A ? needs to match one character. */
112 /* There isn't another character; no match. */
114 else if (*n == L('/')
115 && __builtin_expect (flags & FNM_FILE_NAME, 0))
116 /* A slash does not match a wildcard under
120 /* One character of the string is consumed in matching
121 this ? wildcard, so *??? won't match if there are
122 less than three characters. */
128 /* The wildcard(s) is/are the last element of the pattern.
129 If the name is a file name and contains another slash
130 this means it cannot match, unless the FNM_LEADING_DIR
133 int result = (flags & FNM_FILE_NAME) == 0 ? 0 : FNM_NOMATCH;
135 if (flags & FNM_FILE_NAME)
137 if (flags & FNM_LEADING_DIR)
141 if (MEMCHR (n, L('/'), string_end - n) == NULL)
152 endp = MEMCHR (n, (flags & FNM_FILE_NAME) ? L('/') : L('\0'),
158 || (__builtin_expect (flags & FNM_EXTMATCH, 0) != 0
159 && (c == L('@') || c == L('+') || c == L('!'))
162 int flags2 = ((flags & FNM_FILE_NAME)
163 ? flags : (flags & ~FNM_PERIOD));
164 int no_leading_period2 = no_leading_period;
166 for (--p; n < endp; ++n, no_leading_period2 = 0)
167 if (FCT (p, n, string_end, no_leading_period2, flags2)
171 else if (c == L('/') && (flags & FNM_FILE_NAME))
173 while (n < string_end && *n != L('/'))
175 if (n < string_end && *n == L('/')
176 && (FCT (p, n + 1, string_end, flags & FNM_PERIOD, flags)
182 int flags2 = ((flags & FNM_FILE_NAME)
183 ? flags : (flags & ~FNM_PERIOD));
184 int no_leading_period2 = no_leading_period;
186 if (c == L('\\') && !(flags & FNM_NOESCAPE))
189 for (--p; n < endp; ++n, no_leading_period2 = 0)
190 if (FOLD ((UCHAR) *n) == c
191 && (FCT (p, n, string_end, no_leading_period2, flags2)
197 /* If we come here no match is possible with the wildcard. */
202 /* Nonzero if the sense of the character class is inverted. */
207 if (posixly_correct == 0)
208 posixly_correct = getenv ("POSIXLY_CORRECT") != NULL ? 1 : -1;
213 if (*n == L('.') && no_leading_period)
216 if (*n == L('/') && (flags & FNM_FILE_NAME))
217 /* `/' cannot be matched. */
220 not = (*p == L('!') || (posixly_correct < 0 && *p == L('^')));
224 fn = FOLD ((UCHAR) *n);
229 if (!(flags & FNM_NOESCAPE) && c == L('\\'))
233 c = FOLD ((UCHAR) *p);
239 else if (c == L('[') && *p == L(':'))
241 /* Leave room for the null. */
242 CHAR str[CHAR_CLASS_MAX_LENGTH + 1];
244 #if defined _LIBC || WIDE_CHAR_SUPPORT
247 const CHAR *startp = p;
251 if (c1 == CHAR_CLASS_MAX_LENGTH)
252 /* The name is too long and therefore the pattern
257 if (c == L(':') && p[1] == L(']'))
262 if (c < L('a') || c >= L('z'))
264 /* This cannot possibly be a character class name.
265 Match it as a normal range. */
274 #if defined _LIBC || WIDE_CHAR_SUPPORT
275 wt = IS_CHAR_CLASS (str);
277 /* Invalid character class name. */
280 # if defined _LIBC && ! WIDE_CHAR_VERSION
281 /* The following code is glibc specific but does
282 there a good job in speeding up the code since
283 we can avoid the btowc() call. */
284 if (_ISCTYPE ((UCHAR) *n, wt))
287 if (ISWCTYPE (BTOWC ((UCHAR) *n), wt))
291 if ((STREQ (str, L("alnum")) && ISALNUM ((UCHAR) *n))
292 || (STREQ (str, L("alpha")) && ISALPHA ((UCHAR) *n))
293 || (STREQ (str, L("blank")) && ISBLANK ((UCHAR) *n))
294 || (STREQ (str, L("cntrl")) && ISCNTRL ((UCHAR) *n))
295 || (STREQ (str, L("digit")) && ISDIGIT ((UCHAR) *n))
296 || (STREQ (str, L("graph")) && ISGRAPH ((UCHAR) *n))
297 || (STREQ (str, L("lower")) && ISLOWER ((UCHAR) *n))
298 || (STREQ (str, L("print")) && ISPRINT ((UCHAR) *n))
299 || (STREQ (str, L("punct")) && ISPUNCT ((UCHAR) *n))
300 || (STREQ (str, L("space")) && ISSPACE ((UCHAR) *n))
301 || (STREQ (str, L("upper")) && ISUPPER ((UCHAR) *n))
302 || (STREQ (str, L("xdigit")) && ISXDIGIT ((UCHAR) *n)))
308 else if (c == L('[') && *p == L('='))
312 _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
313 const CHAR *startp = p;
325 if (c != L('=') || p[1] != L(']'))
335 if ((UCHAR) *n == str[0])
340 const int32_t *table;
341 # if WIDE_CHAR_VERSION
342 const int32_t *weights;
343 const int32_t *extra;
345 const unsigned char *weights;
346 const unsigned char *extra;
348 const int32_t *indirect;
350 const UCHAR *cp = (const UCHAR *) str;
352 /* This #include defines a local function! */
353 # if WIDE_CHAR_VERSION
354 # include <locale/weightwc.h>
356 # include <locale/weight.h>
359 # if WIDE_CHAR_VERSION
360 table = (const int32_t *)
361 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEWC);
362 weights = (const int32_t *)
363 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTWC);
364 extra = (const int32_t *)
365 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAWC);
366 indirect = (const int32_t *)
367 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTWC);
369 table = (const int32_t *)
370 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
371 weights = (const unsigned char *)
372 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTMB);
373 extra = (const unsigned char *)
374 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB);
375 indirect = (const int32_t *)
376 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB);
382 /* We found a table entry. Now see whether the
383 character we are currently at has the same
384 equivalance class value. */
385 int len = weights[idx];
387 const UCHAR *np = (const UCHAR *) n;
389 idx2 = findidx (&np);
390 if (idx2 != 0 && len == weights[idx2])
395 && (weights[idx + 1 + cnt]
396 == weights[idx2 + 1 + cnt]))
408 else if (c == L('\0'))
409 /* [ (unterminated) loses. */
418 if (c == L('[') && *p == L('.'))
421 _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
422 const CHAR *startp = p;
428 if (c == L('.') && p[1] == L(']'))
438 /* We have to handling the symbols differently in
439 ranges since then the collation sequence is
441 is_range = *p == L('-') && p[1] != L('\0');
445 /* There are no names defined in the collation
446 data. Therefore we only accept the trivial
447 names consisting of the character itself. */
451 if (!is_range && *n == startp[1])
460 const int32_t *symb_table;
461 # ifdef WIDE_CHAR_VERSION
465 # define str (startp + 1)
467 const unsigned char *extra;
473 # ifdef WIDE_CHAR_VERSION
474 /* We have to convert the name to a single-byte
475 string. This is possible since the names
476 consist of ASCII characters and the internal
477 representation is UCS4. */
478 for (strcnt = 0; strcnt < c1; ++strcnt)
479 str[strcnt] = startp[1 + strcnt];
483 _NL_CURRENT_WORD (LC_COLLATE,
484 _NL_COLLATE_SYMB_HASH_SIZEMB);
485 symb_table = (const int32_t *)
486 _NL_CURRENT (LC_COLLATE,
487 _NL_COLLATE_SYMB_TABLEMB);
488 extra = (const unsigned char *)
489 _NL_CURRENT (LC_COLLATE,
490 _NL_COLLATE_SYMB_EXTRAMB);
492 /* Locate the character in the hashing table. */
493 hash = elem_hash (str, c1);
496 elem = hash % table_size;
497 second = hash % (table_size - 2);
498 while (symb_table[2 * elem] != 0)
500 /* First compare the hashing value. */
501 if (symb_table[2 * elem] == hash
502 && c1 == extra[symb_table[2 * elem + 1]]
504 &extra[symb_table[2 * elem + 1]
507 /* Yep, this is the entry. */
508 idx = symb_table[2 * elem + 1];
509 idx += 1 + extra[idx];
517 if (symb_table[2 * elem] != 0)
519 /* Compare the byte sequence but only if
520 this is not part of a range. */
521 # ifdef WIDE_CHAR_VERSION
524 idx += 1 + extra[idx];
525 /* Adjust for the alignment. */
526 idx = (idx + 3) & ~3;
528 wextra = (int32_t *) &extra[idx + 4];
533 # ifdef WIDE_CHAR_VERSION
534 for (c1 = 0; c1 < wextra[idx]; ++c1)
535 if (n[c1] != wextra[1 + c1])
538 if (c1 == wextra[idx])
541 for (c1 = 0; c1 < extra[idx]; ++c1)
542 if (n[c1] != extra[1 + c1])
545 if (c1 == extra[idx])
550 /* Get the collation sequence value. */
552 # ifdef WIDE_CHAR_VERSION
553 cold = wextra[1 + wextra[idx]];
555 /* Adjust for the alignment. */
556 idx += 1 + extra[idx];
557 idx = (idx + 3) & ~4;
558 cold = *((int32_t *) &extra[idx]);
565 /* No valid character. Match it as a
567 if (!is_range && *n == str[0])
584 /* We have to handling the symbols differently in
585 ranges since then the collation sequence is
587 is_range = (*p == L('-') && p[1] != L('\0')
590 if (!is_range && c == fn)
597 if (c == L('-') && *p != L(']'))
600 /* We have to find the collation sequence
601 value for C. Collation sequence is nothing
602 we can regularly access. The sequence
603 value is defined by the order in which the
604 definitions of the collation values for the
605 various characters appear in the source
606 file. A strange concept, nowhere
612 # ifdef WIDE_CHAR_VERSION
613 /* Search in the `names' array for the characters. */
614 fcollseq = collseq_table_lookup (collseq, fn);
615 if (fcollseq == ~((uint32_t) 0))
616 /* XXX We don't know anything about the character
617 we are supposed to match. This means we are
619 goto range_not_matched;
624 lcollseq = collseq_table_lookup (collseq, cold);
626 fcollseq = collseq[fn];
627 lcollseq = is_seqval ? cold : collseq[(UCHAR) cold];
631 if (cend == L('[') && *p == L('.'))
634 _NL_CURRENT_WORD (LC_COLLATE,
636 const CHAR *startp = p;
642 if (c == L('.') && p[1] == L(']'))
654 /* There are no names defined in the
655 collation data. Therefore we only
656 accept the trivial names consisting
657 of the character itself. */
666 const int32_t *symb_table;
667 # ifdef WIDE_CHAR_VERSION
671 # define str (startp + 1)
673 const unsigned char *extra;
679 # ifdef WIDE_CHAR_VERSION
680 /* We have to convert the name to a single-byte
681 string. This is possible since the names
682 consist of ASCII characters and the internal
683 representation is UCS4. */
684 for (strcnt = 0; strcnt < c1; ++strcnt)
685 str[strcnt] = startp[1 + strcnt];
689 _NL_CURRENT_WORD (LC_COLLATE,
690 _NL_COLLATE_SYMB_HASH_SIZEMB);
691 symb_table = (const int32_t *)
692 _NL_CURRENT (LC_COLLATE,
693 _NL_COLLATE_SYMB_TABLEMB);
694 extra = (const unsigned char *)
695 _NL_CURRENT (LC_COLLATE,
696 _NL_COLLATE_SYMB_EXTRAMB);
698 /* Locate the character in the hashing
700 hash = elem_hash (str, c1);
703 elem = hash % table_size;
704 second = hash % (table_size - 2);
705 while (symb_table[2 * elem] != 0)
707 /* First compare the hashing value. */
708 if (symb_table[2 * elem] == hash
710 == extra[symb_table[2 * elem + 1]])
712 &extra[symb_table[2 * elem + 1]
715 /* Yep, this is the entry. */
716 idx = symb_table[2 * elem + 1];
717 idx += 1 + extra[idx];
725 if (symb_table[2 * elem] != 0)
727 /* Compare the byte sequence but only if
728 this is not part of a range. */
729 # ifdef WIDE_CHAR_VERSION
732 idx += 1 + extra[idx];
733 /* Adjust for the alignment. */
734 idx = (idx + 3) & ~4;
736 wextra = (int32_t *) &extra[idx + 4];
738 /* Get the collation sequence value. */
740 # ifdef WIDE_CHAR_VERSION
741 cend = wextra[1 + wextra[idx]];
743 /* Adjust for the alignment. */
744 idx += 1 + extra[idx];
745 idx = (idx + 3) & ~4;
746 cend = *((int32_t *) &extra[idx]);
749 else if (symb_table[2 * elem] != 0 && c1 == 1)
761 if (!(flags & FNM_NOESCAPE) && cend == L('\\'))
768 /* XXX It is not entirely clear to me how to handle
769 characters which are not mentioned in the
770 collation specification. */
772 # ifdef WIDE_CHAR_VERSION
773 lcollseq == 0xffffffff ||
775 lcollseq <= fcollseq)
777 /* We have to look at the upper bound. */
784 # ifdef WIDE_CHAR_VERSION
786 collseq_table_lookup (collseq, cend);
787 if (hcollseq == ~((uint32_t) 0))
789 /* Hum, no information about the upper
790 bound. The matching succeeds if the
791 lower bound is matched exactly. */
792 if (lcollseq != fcollseq)
793 goto range_not_matched;
798 hcollseq = collseq[cend];
802 if (lcollseq <= hcollseq && fcollseq <= hcollseq)
805 # ifdef WIDE_CHAR_VERSION
809 /* We use a boring value comparison of the character
810 values. This is better than comparing using
811 `strcoll' since the latter would have surprising
812 and sometimes fatal consequences. */
815 if (!(flags & FNM_NOESCAPE) && cend == L('\\'))
821 if (cold <= fn && fn <= cend)
838 /* Skip the rest of the [...] that already matched. */
845 /* [... (unterminated) loses. */
848 if (!(flags & FNM_NOESCAPE) && c == L('\\'))
852 /* XXX 1003.2d11 is unclear if this is right. */
855 else if (c == L('[') && *p == L(':'))
858 const CHAR *startp = p;
863 if (++c1 == CHAR_CLASS_MAX_LENGTH)
866 if (*p == L(':') && p[1] == L(']'))
869 if (c < L('a') || c >= L('z'))
878 else if (c == L('[') && *p == L('='))
884 if (c != L('=') || p[1] != L(']'))
889 else if (c == L('[') && *p == L('.'))
898 if (*p == L('.') && p[1] == L(']'))
914 if (__builtin_expect (flags & FNM_EXTMATCH, 0) && *p == '(')
918 res = EXT (c, p, n, string_end, no_leading_period, flags);
925 if (NO_LEADING_PERIOD (flags))
927 if (n == string_end || c != *n)
930 new_no_leading_period = 1;
936 if (n == string_end || c != FOLD ((UCHAR) *n))
940 no_leading_period = new_no_leading_period;
947 if ((flags & FNM_LEADING_DIR) && n != string_end && *n == L('/'))
948 /* The FNM_LEADING_DIR flag says that "foo*" matches "foobar/frobozz". */
957 END (const CHAR *pattern)
959 const CHAR *p = pattern;
963 /* This is an invalid pattern. */
965 else if (*p == L('['))
967 /* Handle brackets special. */
968 if (posixly_correct == 0)
969 posixly_correct = getenv ("POSIXLY_CORRECT") != NULL ? 1 : -1;
971 /* Skip the not sign. We have to recognize it because of a possibly
973 if (*++p == L('!') || (posixly_correct < 0 && *p == L('^')))
975 /* A leading ']' is recognized as such. */
978 /* Skip over all characters of the list. */
981 /* This is no valid pattern. */
984 else if ((*p == L('?') || *p == L('*') || *p == L('+') || *p == L('@')
985 || *p == L('!')) && p[1] == L('('))
987 else if (*p == L(')'))
996 EXT (INT opt, const CHAR *pattern, const CHAR *string, const CHAR *string_end,
997 int no_leading_period, int flags)
1003 struct patternlist *next;
1006 struct patternlist **lastp = &list;
1007 size_t pattern_len = STRLEN (pattern);
1011 /* Parse the pattern. Store the individual parts in the list. */
1013 for (startp = p = pattern + 1; level >= 0; ++p)
1015 /* This is an invalid pattern. */
1017 else if (*p == L('['))
1019 /* Handle brackets special. */
1020 if (posixly_correct == 0)
1021 posixly_correct = getenv ("POSIXLY_CORRECT") != NULL ? 1 : -1;
1023 /* Skip the not sign. We have to recognize it because of a possibly
1025 if (*++p == L('!') || (posixly_correct < 0 && *p == L('^')))
1027 /* A leading ']' is recognized as such. */
1030 /* Skip over all characters of the list. */
1031 while (*p != L(']'))
1032 if (*p++ == L('\0'))
1033 /* This is no valid pattern. */
1036 else if ((*p == L('?') || *p == L('*') || *p == L('+') || *p == L('@')
1037 || *p == L('!')) && p[1] == L('('))
1038 /* Remember the nesting level. */
1040 else if (*p == L(')'))
1044 /* This means we found the end of the pattern. */
1045 #define NEW_PATTERN \
1046 struct patternlist *newp; \
1049 plen = (opt == L('?') || opt == L('@') \
1051 : p - startp + 1); \
1052 newp = (struct patternlist *) \
1053 alloca (offsetof (struct patternlist, str) \
1054 + (plen * sizeof (CHAR))); \
1055 *((CHAR *) MEMPCPY (newp->str, startp, p - startp)) = L('\0'); \
1056 newp->next = NULL; \
1062 else if (*p == L('|'))
1070 assert (list != NULL);
1071 assert (p[-1] == L(')'));
1077 if (FCT (p, string, string_end, no_leading_period, flags) == 0)
1084 for (rs = string; rs <= string_end; ++rs)
1085 /* First match the prefix with the current pattern with the
1087 if (FCT (list->str, string, rs, no_leading_period,
1088 flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD) == 0
1089 /* This was successful. Now match the rest with the rest
1091 && (FCT (p, rs, string_end,
1094 : rs[-1] == '/' && NO_LEADING_PERIOD (flags) ? 1 : 0,
1095 flags & FNM_FILE_NAME
1096 ? flags : flags & ~FNM_PERIOD) == 0
1097 /* This didn't work. Try the whole pattern. */
1099 && FCT (pattern - 1, rs, string_end,
1102 : (rs[-1] == '/' && NO_LEADING_PERIOD (flags)
1104 flags & FNM_FILE_NAME
1105 ? flags : flags & ~FNM_PERIOD) == 0)))
1106 /* It worked. Signal success. */
1109 while ((list = list->next) != NULL);
1111 /* None of the patterns lead to a match. */
1115 if (FCT (p, string, string_end, no_leading_period, flags) == 0)
1121 /* I cannot believe it but `strcat' is actually acceptable
1122 here. Match the entire string with the prefix from the
1123 pattern list and the rest of the pattern following the
1125 if (FCT (STRCAT (list->str, p), string, string_end,
1127 flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD) == 0)
1128 /* It worked. Signal success. */
1130 while ((list = list->next) != NULL);
1132 /* None of the patterns lead to a match. */
1136 for (rs = string; rs <= string_end; ++rs)
1138 struct patternlist *runp;
1140 for (runp = list; runp != NULL; runp = runp->next)
1141 if (FCT (runp->str, string, rs, no_leading_period,
1142 flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD) == 0)
1145 /* If none of the patterns matched see whether the rest does. */
1147 && (FCT (p, rs, string_end,
1150 : rs[-1] == '/' && NO_LEADING_PERIOD (flags) ? 1 : 0,
1151 flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD)
1153 /* This is successful. */
1157 /* None of the patterns together with the rest of the pattern
1162 assert (! "Invalid extended matching operator");