1 /* Copyright (C) 1991-1993, 1996-2006, 2009-2011 Free Software Foundation, Inc.
2 This file is part of the GNU C Library.
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2, or (at your option)
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software Foundation,
16 Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
18 /* Match STRING against the file name pattern PATTERN, returning zero if
19 it matches, nonzero if not. */
20 static int EXT (INT opt, const CHAR *pattern, const CHAR *string,
21 const CHAR *string_end, bool no_leading_period, int flags)
23 static const CHAR *END (const CHAR *patternp) internal_function;
27 FCT (const CHAR *pattern, const CHAR *string, const CHAR *string_end,
28 bool no_leading_period, int flags)
30 register const CHAR *p = pattern, *n = string;
33 # if WIDE_CHAR_VERSION
34 const char *collseq = (const char *)
35 _NL_CURRENT(LC_COLLATE, _NL_COLLATE_COLLSEQWC);
37 const UCHAR *collseq = (const UCHAR *)
38 _NL_CURRENT(LC_COLLATE, _NL_COLLATE_COLLSEQMB);
42 while ((c = *p++) != L_('\0'))
44 bool new_no_leading_period = false;
50 if (__builtin_expect (flags & FNM_EXTMATCH, 0) && *p == '(')
54 res = EXT (c, p, n, string_end, no_leading_period,
62 else if (*n == L_('/') && (flags & FNM_FILE_NAME))
64 else if (*n == L_('.') && no_leading_period)
69 if (!(flags & FNM_NOESCAPE))
73 /* Trailing \ loses. */
77 if (n == string_end || FOLD ((UCHAR) *n) != c)
82 if (__builtin_expect (flags & FNM_EXTMATCH, 0) && *p == '(')
86 res = EXT (c, p, n, string_end, no_leading_period,
92 if (n != string_end && *n == L_('.') && no_leading_period)
95 for (c = *p++; c == L_('?') || c == L_('*'); c = *p++)
97 if (*p == L_('(') && (flags & FNM_EXTMATCH) != 0)
99 const CHAR *endp = END (p);
102 /* This is a pattern. Skip over it. */
110 /* A ? needs to match one character. */
112 /* There isn't another character; no match. */
114 else if (*n == L_('/')
115 && __builtin_expect (flags & FNM_FILE_NAME, 0))
116 /* A slash does not match a wildcard under
120 /* One character of the string is consumed in matching
121 this ? wildcard, so *??? won't match if there are
122 less than three characters. */
128 /* The wildcard(s) is/are the last element of the pattern.
129 If the name is a file name and contains another slash
130 this means it cannot match, unless the FNM_LEADING_DIR
133 int result = (flags & FNM_FILE_NAME) == 0 ? 0 : FNM_NOMATCH;
135 if (flags & FNM_FILE_NAME)
137 if (flags & FNM_LEADING_DIR)
141 if (MEMCHR (n, L_('/'), string_end - n) == NULL)
152 endp = MEMCHR (n, (flags & FNM_FILE_NAME) ? L_('/') : L_('\0'),
158 || (__builtin_expect (flags & FNM_EXTMATCH, 0) != 0
159 && (c == L_('@') || c == L_('+') || c == L_('!'))
162 int flags2 = ((flags & FNM_FILE_NAME)
163 ? flags : (flags & ~FNM_PERIOD));
164 bool no_leading_period2 = no_leading_period;
166 for (--p; n < endp; ++n, no_leading_period2 = false)
167 if (FCT (p, n, string_end, no_leading_period2, flags2)
171 else if (c == L_('/') && (flags & FNM_FILE_NAME))
173 while (n < string_end && *n != L_('/'))
175 if (n < string_end && *n == L_('/')
176 && (FCT (p, n + 1, string_end, flags & FNM_PERIOD, flags)
182 int flags2 = ((flags & FNM_FILE_NAME)
183 ? flags : (flags & ~FNM_PERIOD));
184 int no_leading_period2 = no_leading_period;
186 if (c == L_('\\') && !(flags & FNM_NOESCAPE))
189 for (--p; n < endp; ++n, no_leading_period2 = false)
190 if (FOLD ((UCHAR) *n) == c
191 && (FCT (p, n, string_end, no_leading_period2, flags2)
197 /* If we come here no match is possible with the wildcard. */
202 /* Nonzero if the sense of the character class is inverted. */
209 if (posixly_correct == 0)
210 posixly_correct = getenv ("POSIXLY_CORRECT") != NULL ? 1 : -1;
215 if (*n == L_('.') && no_leading_period)
218 if (*n == L_('/') && (flags & FNM_FILE_NAME))
219 /* `/' cannot be matched. */
222 not = (*p == L_('!') || (posixly_correct < 0 && *p == L_('^')));
226 fn = FOLD ((UCHAR) *n);
231 if (!(flags & FNM_NOESCAPE) && c == L_('\\'))
235 c = FOLD ((UCHAR) *p);
240 else if (c == L_('[') && *p == L_(':'))
242 /* Leave room for the null. */
243 CHAR str[CHAR_CLASS_MAX_LENGTH + 1];
245 #if defined _LIBC || WIDE_CHAR_SUPPORT
248 const CHAR *startp = p;
252 if (c1 == CHAR_CLASS_MAX_LENGTH)
253 /* The name is too long and therefore the pattern
258 if (c == L_(':') && p[1] == L_(']'))
263 if (c < L_('a') || c >= L_('z'))
265 /* This cannot possibly be a character class name.
266 Match it as a normal range. */
275 #if defined _LIBC || WIDE_CHAR_SUPPORT
276 wt = IS_CHAR_CLASS (str);
278 /* Invalid character class name. */
281 # if defined _LIBC && ! WIDE_CHAR_VERSION
282 /* The following code is glibc specific but does
283 there a good job in speeding up the code since
284 we can avoid the btowc() call. */
285 if (_ISCTYPE ((UCHAR) *n, wt))
288 if (ISWCTYPE (BTOWC ((UCHAR) *n), wt))
292 if ((STREQ (str, L_("alnum")) && isalnum ((UCHAR) *n))
293 || (STREQ (str, L_("alpha")) && isalpha ((UCHAR) *n))
294 || (STREQ (str, L_("blank")) && isblank ((UCHAR) *n))
295 || (STREQ (str, L_("cntrl")) && iscntrl ((UCHAR) *n))
296 || (STREQ (str, L_("digit")) && isdigit ((UCHAR) *n))
297 || (STREQ (str, L_("graph")) && isgraph ((UCHAR) *n))
298 || (STREQ (str, L_("lower")) && islower ((UCHAR) *n))
299 || (STREQ (str, L_("print")) && isprint ((UCHAR) *n))
300 || (STREQ (str, L_("punct")) && ispunct ((UCHAR) *n))
301 || (STREQ (str, L_("space")) && isspace ((UCHAR) *n))
302 || (STREQ (str, L_("upper")) && isupper ((UCHAR) *n))
303 || (STREQ (str, L_("xdigit")) && isxdigit ((UCHAR) *n)))
309 else if (c == L_('[') && *p == L_('='))
313 _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
314 const CHAR *startp = p;
326 if (c != L_('=') || p[1] != L_(']'))
336 if ((UCHAR) *n == str[0])
341 const int32_t *table;
342 # if WIDE_CHAR_VERSION
343 const int32_t *weights;
344 const int32_t *extra;
346 const unsigned char *weights;
347 const unsigned char *extra;
349 const int32_t *indirect;
351 const UCHAR *cp = (const UCHAR *) str;
353 /* This #include defines a local function! */
354 # if WIDE_CHAR_VERSION
355 # include <locale/weightwc.h>
357 # include <locale/weight.h>
360 # if WIDE_CHAR_VERSION
361 table = (const int32_t *)
362 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEWC);
363 weights = (const int32_t *)
364 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTWC);
365 extra = (const int32_t *)
366 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAWC);
367 indirect = (const int32_t *)
368 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTWC);
370 table = (const int32_t *)
371 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
372 weights = (const unsigned char *)
373 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTMB);
374 extra = (const unsigned char *)
375 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB);
376 indirect = (const int32_t *)
377 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB);
383 /* We found a table entry. Now see whether the
384 character we are currently at has the same
385 equivalance class value. */
386 int len = weights[idx & 0xffffff];
388 const UCHAR *np = (const UCHAR *) n;
390 idx2 = findidx (&np);
392 && (idx >> 24) == (idx2 >> 24)
393 && len == weights[idx2 & 0xffffff])
401 && (weights[idx + 1 + cnt]
402 == weights[idx2 + 1 + cnt]))
414 else if (c == L_('\0'))
416 /* [ unterminated, treat as normal character. */
424 bool is_range = false;
427 bool is_seqval = false;
429 if (c == L_('[') && *p == L_('.'))
432 _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
433 const CHAR *startp = p;
439 if (c == L_('.') && p[1] == L_(']'))
449 /* We have to handling the symbols differently in
450 ranges since then the collation sequence is
452 is_range = *p == L_('-') && p[1] != L_('\0');
456 /* There are no names defined in the collation
457 data. Therefore we only accept the trivial
458 names consisting of the character itself. */
462 if (!is_range && *n == startp[1])
471 const int32_t *symb_table;
472 # ifdef WIDE_CHAR_VERSION
476 # define str (startp + 1)
478 const unsigned char *extra;
484 # ifdef WIDE_CHAR_VERSION
485 /* We have to convert the name to a single-byte
486 string. This is possible since the names
487 consist of ASCII characters and the internal
488 representation is UCS4. */
489 for (strcnt = 0; strcnt < c1; ++strcnt)
490 str[strcnt] = startp[1 + strcnt];
494 _NL_CURRENT_WORD (LC_COLLATE,
495 _NL_COLLATE_SYMB_HASH_SIZEMB);
496 symb_table = (const int32_t *)
497 _NL_CURRENT (LC_COLLATE,
498 _NL_COLLATE_SYMB_TABLEMB);
499 extra = (const unsigned char *)
500 _NL_CURRENT (LC_COLLATE,
501 _NL_COLLATE_SYMB_EXTRAMB);
503 /* Locate the character in the hashing table. */
504 hash = elem_hash (str, c1);
507 elem = hash % table_size;
508 if (symb_table[2 * elem] != 0)
510 second = hash % (table_size - 2) + 1;
514 /* First compare the hashing value. */
515 if (symb_table[2 * elem] == hash
517 == extra[symb_table[2 * elem + 1]])
519 &extra[symb_table[2 * elem
523 /* Yep, this is the entry. */
524 idx = symb_table[2 * elem + 1];
525 idx += 1 + extra[idx];
532 while (symb_table[2 * elem] != 0);
535 if (symb_table[2 * elem] != 0)
537 /* Compare the byte sequence but only if
538 this is not part of a range. */
539 # ifdef WIDE_CHAR_VERSION
542 idx += 1 + extra[idx];
543 /* Adjust for the alignment. */
544 idx = (idx + 3) & ~3;
546 wextra = (int32_t *) &extra[idx + 4];
551 # ifdef WIDE_CHAR_VERSION
553 (int32_t) c1 < wextra[idx];
555 if (n[c1] != wextra[1 + c1])
558 if ((int32_t) c1 == wextra[idx])
561 for (c1 = 0; c1 < extra[idx]; ++c1)
562 if (n[c1] != extra[1 + c1])
565 if (c1 == extra[idx])
570 /* Get the collation sequence value. */
572 # ifdef WIDE_CHAR_VERSION
573 cold = wextra[1 + wextra[idx]];
575 /* Adjust for the alignment. */
576 idx += 1 + extra[idx];
577 idx = (idx + 3) & ~4;
578 cold = *((int32_t *) &extra[idx]);
585 /* No valid character. Match it as a
587 if (!is_range && *n == str[0])
604 /* We have to handling the symbols differently in
605 ranges since then the collation sequence is
607 is_range = (*p == L_('-') && p[1] != L_('\0')
610 if (!is_range && c == fn)
614 /* This is needed if we goto normal_bracket; from
615 outside of is_seqval's scope. */
623 if (c == L_('-') && *p != L_(']'))
626 /* We have to find the collation sequence
627 value for C. Collation sequence is nothing
628 we can regularly access. The sequence
629 value is defined by the order in which the
630 definitions of the collation values for the
631 various characters appear in the source
632 file. A strange concept, nowhere
638 # ifdef WIDE_CHAR_VERSION
639 /* Search in the `names' array for the characters. */
640 fcollseq = __collseq_table_lookup (collseq, fn);
641 if (fcollseq == ~((uint32_t) 0))
642 /* XXX We don't know anything about the character
643 we are supposed to match. This means we are
645 goto range_not_matched;
650 lcollseq = __collseq_table_lookup (collseq, cold);
652 fcollseq = collseq[fn];
653 lcollseq = is_seqval ? cold : collseq[(UCHAR) cold];
657 if (cend == L_('[') && *p == L_('.'))
660 _NL_CURRENT_WORD (LC_COLLATE,
662 const CHAR *startp = p;
668 if (c == L_('.') && p[1] == L_(']'))
680 /* There are no names defined in the
681 collation data. Therefore we only
682 accept the trivial names consisting
683 of the character itself. */
692 const int32_t *symb_table;
693 # ifdef WIDE_CHAR_VERSION
697 # define str (startp + 1)
699 const unsigned char *extra;
705 # ifdef WIDE_CHAR_VERSION
706 /* We have to convert the name to a single-byte
707 string. This is possible since the names
708 consist of ASCII characters and the internal
709 representation is UCS4. */
710 for (strcnt = 0; strcnt < c1; ++strcnt)
711 str[strcnt] = startp[1 + strcnt];
715 _NL_CURRENT_WORD (LC_COLLATE,
716 _NL_COLLATE_SYMB_HASH_SIZEMB);
717 symb_table = (const int32_t *)
718 _NL_CURRENT (LC_COLLATE,
719 _NL_COLLATE_SYMB_TABLEMB);
720 extra = (const unsigned char *)
721 _NL_CURRENT (LC_COLLATE,
722 _NL_COLLATE_SYMB_EXTRAMB);
724 /* Locate the character in the hashing
726 hash = elem_hash (str, c1);
729 elem = hash % table_size;
730 if (symb_table[2 * elem] != 0)
732 second = hash % (table_size - 2) + 1;
736 /* First compare the hashing value. */
737 if (symb_table[2 * elem] == hash
739 == extra[symb_table[2 * elem + 1]])
741 &extra[symb_table[2 * elem + 1]
744 /* Yep, this is the entry. */
745 idx = symb_table[2 * elem + 1];
746 idx += 1 + extra[idx];
753 while (symb_table[2 * elem] != 0);
756 if (symb_table[2 * elem] != 0)
758 /* Compare the byte sequence but only if
759 this is not part of a range. */
760 # ifdef WIDE_CHAR_VERSION
763 idx += 1 + extra[idx];
764 /* Adjust for the alignment. */
765 idx = (idx + 3) & ~4;
767 wextra = (int32_t *) &extra[idx + 4];
769 /* Get the collation sequence value. */
771 # ifdef WIDE_CHAR_VERSION
772 cend = wextra[1 + wextra[idx]];
774 /* Adjust for the alignment. */
775 idx += 1 + extra[idx];
776 idx = (idx + 3) & ~4;
777 cend = *((int32_t *) &extra[idx]);
780 else if (symb_table[2 * elem] != 0 && c1 == 1)
792 if (!(flags & FNM_NOESCAPE) && cend == L_('\\'))
794 if (cend == L_('\0'))
799 /* XXX It is not entirely clear to me how to handle
800 characters which are not mentioned in the
801 collation specification. */
803 # ifdef WIDE_CHAR_VERSION
804 lcollseq == 0xffffffff ||
806 lcollseq <= fcollseq)
808 /* We have to look at the upper bound. */
815 # ifdef WIDE_CHAR_VERSION
817 __collseq_table_lookup (collseq, cend);
818 if (hcollseq == ~((uint32_t) 0))
820 /* Hum, no information about the upper
821 bound. The matching succeeds if the
822 lower bound is matched exactly. */
823 if (lcollseq != fcollseq)
824 goto range_not_matched;
829 hcollseq = collseq[cend];
833 if (lcollseq <= hcollseq && fcollseq <= hcollseq)
836 # ifdef WIDE_CHAR_VERSION
840 /* We use a boring value comparison of the character
841 values. This is better than comparing using
842 `strcoll' since the latter would have surprising
843 and sometimes fatal consequences. */
846 if (!(flags & FNM_NOESCAPE) && cend == L_('\\'))
848 if (cend == L_('\0'))
852 if (cold <= fn && fn <= cend)
869 /* Skip the rest of the [...] that already matched. */
876 /* [... (unterminated) loses. */
879 if (!(flags & FNM_NOESCAPE) && c == L_('\\'))
883 /* XXX 1003.2d11 is unclear if this is right. */
886 else if (c == L_('[') && *p == L_(':'))
889 const CHAR *startp = p;
894 if (++c1 == CHAR_CLASS_MAX_LENGTH)
897 if (*p == L_(':') && p[1] == L_(']'))
900 if (c < L_('a') || c >= L_('z'))
909 else if (c == L_('[') && *p == L_('='))
915 if (c != L_('=') || p[1] != L_(']'))
920 else if (c == L_('[') && *p == L_('.'))
929 if (*p == L_('.') && p[1] == L_(']'))
936 while (c != L_(']'));
945 if (__builtin_expect (flags & FNM_EXTMATCH, 0) && *p == '(')
949 res = EXT (c, p, n, string_end, no_leading_period, flags);
956 if (NO_LEADING_PERIOD (flags))
958 if (n == string_end || c != (UCHAR) *n)
961 new_no_leading_period = true;
967 if (n == string_end || c != FOLD ((UCHAR) *n))
971 no_leading_period = new_no_leading_period;
978 if ((flags & FNM_LEADING_DIR) && n != string_end && *n == L_('/'))
979 /* The FNM_LEADING_DIR flag says that "foo*" matches "foobar/frobozz". */
988 END (const CHAR *pattern)
990 const CHAR *p = pattern;
993 if (*++p == L_('\0'))
994 /* This is an invalid pattern. */
996 else if (*p == L_('['))
998 /* Handle brackets special. */
999 if (posixly_correct == 0)
1000 posixly_correct = getenv ("POSIXLY_CORRECT") != NULL ? 1 : -1;
1002 /* Skip the not sign. We have to recognize it because of a possibly
1004 if (*++p == L_('!') || (posixly_correct < 0 && *p == L_('^')))
1006 /* A leading ']' is recognized as such. */
1009 /* Skip over all characters of the list. */
1010 while (*p != L_(']'))
1011 if (*p++ == L_('\0'))
1012 /* This is no valid pattern. */
1015 else if ((*p == L_('?') || *p == L_('*') || *p == L_('+') || *p == L_('@')
1016 || *p == L_('!')) && p[1] == L_('('))
1018 else if (*p == L_(')'))
1027 EXT (INT opt, const CHAR *pattern, const CHAR *string, const CHAR *string_end,
1028 bool no_leading_period, int flags)
1034 struct patternlist *next;
1037 struct patternlist **lastp = &list;
1038 size_t pattern_len = STRLEN (pattern);
1041 enum { ALLOCA_LIMIT = 8000 };
1043 /* Parse the pattern. Store the individual parts in the list. */
1045 for (startp = p = pattern + 1; ; ++p)
1047 /* This is an invalid pattern. */
1049 else if (*p == L_('['))
1051 /* Handle brackets special. */
1052 if (posixly_correct == 0)
1053 posixly_correct = getenv ("POSIXLY_CORRECT") != NULL ? 1 : -1;
1055 /* Skip the not sign. We have to recognize it because of a possibly
1057 if (*++p == L_('!') || (posixly_correct < 0 && *p == L_('^')))
1059 /* A leading ']' is recognized as such. */
1062 /* Skip over all characters of the list. */
1063 while (*p != L_(']'))
1064 if (*p++ == L_('\0'))
1065 /* This is no valid pattern. */
1068 else if ((*p == L_('?') || *p == L_('*') || *p == L_('+') || *p == L_('@')
1069 || *p == L_('!')) && p[1] == L_('('))
1070 /* Remember the nesting level. */
1072 else if (*p == L_(')'))
1076 /* This means we found the end of the pattern. */
1077 #define NEW_PATTERN \
1078 struct patternlist *newp; \
1083 plen = (opt == L_('?') || opt == L_('@') \
1085 : p - startp + 1UL); \
1086 plensize = plen * sizeof (CHAR); \
1087 newpsize = offsetof (struct patternlist, str) + plensize; \
1088 if ((size_t) -1 / sizeof (CHAR) < plen \
1089 || newpsize < offsetof (struct patternlist, str) \
1090 || ALLOCA_LIMIT <= newpsize) \
1092 newp = (struct patternlist *) alloca (newpsize); \
1093 *((CHAR *) MEMPCPY (newp->str, startp, p - startp)) = L_('\0'); \
1094 newp->next = NULL; \
1101 else if (*p == L_('|'))
1109 assert (list != NULL);
1110 assert (p[-1] == L_(')'));
1116 if (FCT (p, string, string_end, no_leading_period, flags) == 0)
1123 for (rs = string; rs <= string_end; ++rs)
1124 /* First match the prefix with the current pattern with the
1126 if (FCT (list->str, string, rs, no_leading_period,
1127 flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD) == 0
1128 /* This was successful. Now match the rest with the rest
1130 && (FCT (p, rs, string_end,
1133 : rs[-1] == '/' && NO_LEADING_PERIOD (flags),
1134 flags & FNM_FILE_NAME
1135 ? flags : flags & ~FNM_PERIOD) == 0
1136 /* This didn't work. Try the whole pattern. */
1138 && FCT (pattern - 1, rs, string_end,
1141 : rs[-1] == '/' && NO_LEADING_PERIOD (flags),
1142 flags & FNM_FILE_NAME
1143 ? flags : flags & ~FNM_PERIOD) == 0)))
1144 /* It worked. Signal success. */
1147 while ((list = list->next) != NULL);
1149 /* None of the patterns lead to a match. */
1153 if (FCT (p, string, string_end, no_leading_period, flags) == 0)
1159 /* I cannot believe it but `strcat' is actually acceptable
1160 here. Match the entire string with the prefix from the
1161 pattern list and the rest of the pattern following the
1163 if (FCT (STRCAT (list->str, p), string, string_end,
1165 flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD) == 0)
1166 /* It worked. Signal success. */
1168 while ((list = list->next) != NULL);
1170 /* None of the patterns lead to a match. */
1174 for (rs = string; rs <= string_end; ++rs)
1176 struct patternlist *runp;
1178 for (runp = list; runp != NULL; runp = runp->next)
1179 if (FCT (runp->str, string, rs, no_leading_period,
1180 flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD) == 0)
1183 /* If none of the patterns matched see whether the rest does. */
1185 && (FCT (p, rs, string_end,
1188 : rs[-1] == '/' && NO_LEADING_PERIOD (flags),
1189 flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD)
1191 /* This is successful. */
1195 /* None of the patterns together with the rest of the pattern
1200 assert (! "Invalid extended matching operator");