1 /* Copyright (C) 1991,1992,1993,1996,1997,1998,1999,2000,2001,2002,2003,2004,2005,2006
2 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 2, or (at your option)
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software Foundation,
17 Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
19 /* Match STRING against the file name pattern PATTERN, returning zero if
20 it matches, nonzero if not. */
21 static int EXT (INT opt, const CHAR *pattern, const CHAR *string,
22 const CHAR *string_end, bool no_leading_period, int flags)
24 static const CHAR *END (const CHAR *patternp) internal_function;
28 FCT (const CHAR *pattern, const CHAR *string, const CHAR *string_end,
29 bool no_leading_period, int flags)
31 register const CHAR *p = pattern, *n = string;
34 # if WIDE_CHAR_VERSION
35 const char *collseq = (const char *)
36 _NL_CURRENT(LC_COLLATE, _NL_COLLATE_COLLSEQWC);
38 const UCHAR *collseq = (const UCHAR *)
39 _NL_CURRENT(LC_COLLATE, _NL_COLLATE_COLLSEQMB);
43 while ((c = *p++) != L_('\0'))
45 bool new_no_leading_period = false;
51 if (__builtin_expect (flags & FNM_EXTMATCH, 0) && *p == '(')
55 res = EXT (c, p, n, string_end, no_leading_period,
63 else if (*n == L_('/') && (flags & FNM_FILE_NAME))
65 else if (*n == L_('.') && no_leading_period)
70 if (!(flags & FNM_NOESCAPE))
74 /* Trailing \ loses. */
78 if (n == string_end || FOLD ((UCHAR) *n) != c)
83 if (__builtin_expect (flags & FNM_EXTMATCH, 0) && *p == '(')
87 res = EXT (c, p, n, string_end, no_leading_period,
93 if (n != string_end && *n == L_('.') && no_leading_period)
96 for (c = *p++; c == L_('?') || c == L_('*'); c = *p++)
98 if (*p == L_('(') && (flags & FNM_EXTMATCH) != 0)
100 const CHAR *endp = END (p);
103 /* This is a pattern. Skip over it. */
111 /* A ? needs to match one character. */
113 /* There isn't another character; no match. */
115 else if (*n == L_('/')
116 && __builtin_expect (flags & FNM_FILE_NAME, 0))
117 /* A slash does not match a wildcard under
121 /* One character of the string is consumed in matching
122 this ? wildcard, so *??? won't match if there are
123 less than three characters. */
129 /* The wildcard(s) is/are the last element of the pattern.
130 If the name is a file name and contains another slash
131 this means it cannot match, unless the FNM_LEADING_DIR
134 int result = (flags & FNM_FILE_NAME) == 0 ? 0 : FNM_NOMATCH;
136 if (flags & FNM_FILE_NAME)
138 if (flags & FNM_LEADING_DIR)
142 if (MEMCHR (n, L_('/'), string_end - n) == NULL)
153 endp = MEMCHR (n, (flags & FNM_FILE_NAME) ? L_('/') : L_('\0'),
159 || (__builtin_expect (flags & FNM_EXTMATCH, 0) != 0
160 && (c == L_('@') || c == L_('+') || c == L_('!'))
163 int flags2 = ((flags & FNM_FILE_NAME)
164 ? flags : (flags & ~FNM_PERIOD));
165 bool no_leading_period2 = no_leading_period;
167 for (--p; n < endp; ++n, no_leading_period2 = false)
168 if (FCT (p, n, string_end, no_leading_period2, flags2)
172 else if (c == L_('/') && (flags & FNM_FILE_NAME))
174 while (n < string_end && *n != L_('/'))
176 if (n < string_end && *n == L_('/')
177 && (FCT (p, n + 1, string_end, flags & FNM_PERIOD, flags)
183 int flags2 = ((flags & FNM_FILE_NAME)
184 ? flags : (flags & ~FNM_PERIOD));
185 int no_leading_period2 = no_leading_period;
187 if (c == L_('\\') && !(flags & FNM_NOESCAPE))
190 for (--p; n < endp; ++n, no_leading_period2 = false)
191 if (FOLD ((UCHAR) *n) == c
192 && (FCT (p, n, string_end, no_leading_period2, flags2)
198 /* If we come here no match is possible with the wildcard. */
203 /* Nonzero if the sense of the character class is inverted. */
208 if (posixly_correct == 0)
209 posixly_correct = getenv ("POSIXLY_CORRECT") != NULL ? 1 : -1;
214 if (*n == L_('.') && no_leading_period)
217 if (*n == L_('/') && (flags & FNM_FILE_NAME))
218 /* `/' cannot be matched. */
221 not = (*p == L_('!') || (posixly_correct < 0 && *p == L_('^')));
225 fn = FOLD ((UCHAR) *n);
230 if (!(flags & FNM_NOESCAPE) && c == L_('\\'))
234 c = FOLD ((UCHAR) *p);
239 else if (c == L_('[') && *p == L_(':'))
241 /* Leave room for the null. */
242 CHAR str[CHAR_CLASS_MAX_LENGTH + 1];
244 #if defined _LIBC || WIDE_CHAR_SUPPORT
247 const CHAR *startp = p;
251 if (c1 == CHAR_CLASS_MAX_LENGTH)
252 /* The name is too long and therefore the pattern
257 if (c == L_(':') && p[1] == L_(']'))
262 if (c < L_('a') || c >= L_('z'))
264 /* This cannot possibly be a character class name.
265 Match it as a normal range. */
274 #if defined _LIBC || WIDE_CHAR_SUPPORT
275 wt = IS_CHAR_CLASS (str);
277 /* Invalid character class name. */
280 # if defined _LIBC && ! WIDE_CHAR_VERSION
281 /* The following code is glibc specific but does
282 there a good job in speeding up the code since
283 we can avoid the btowc() call. */
284 if (_ISCTYPE ((UCHAR) *n, wt))
287 if (ISWCTYPE (BTOWC ((UCHAR) *n), wt))
291 if ((STREQ (str, L_("alnum")) && isalnum ((UCHAR) *n))
292 || (STREQ (str, L_("alpha")) && isalpha ((UCHAR) *n))
293 || (STREQ (str, L_("blank")) && isblank ((UCHAR) *n))
294 || (STREQ (str, L_("cntrl")) && iscntrl ((UCHAR) *n))
295 || (STREQ (str, L_("digit")) && isdigit ((UCHAR) *n))
296 || (STREQ (str, L_("graph")) && isgraph ((UCHAR) *n))
297 || (STREQ (str, L_("lower")) && islower ((UCHAR) *n))
298 || (STREQ (str, L_("print")) && isprint ((UCHAR) *n))
299 || (STREQ (str, L_("punct")) && ispunct ((UCHAR) *n))
300 || (STREQ (str, L_("space")) && isspace ((UCHAR) *n))
301 || (STREQ (str, L_("upper")) && isupper ((UCHAR) *n))
302 || (STREQ (str, L_("xdigit")) && isxdigit ((UCHAR) *n)))
308 else if (c == L_('[') && *p == L_('='))
312 _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
313 const CHAR *startp = p;
325 if (c != L_('=') || p[1] != L_(']'))
335 if ((UCHAR) *n == str[0])
340 const int32_t *table;
341 # if WIDE_CHAR_VERSION
342 const int32_t *weights;
343 const int32_t *extra;
345 const unsigned char *weights;
346 const unsigned char *extra;
348 const int32_t *indirect;
350 const UCHAR *cp = (const UCHAR *) str;
352 /* This #include defines a local function! */
353 # if WIDE_CHAR_VERSION
354 # include <locale/weightwc.h>
356 # include <locale/weight.h>
359 # if WIDE_CHAR_VERSION
360 table = (const int32_t *)
361 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEWC);
362 weights = (const int32_t *)
363 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTWC);
364 extra = (const int32_t *)
365 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAWC);
366 indirect = (const int32_t *)
367 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTWC);
369 table = (const int32_t *)
370 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
371 weights = (const unsigned char *)
372 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTMB);
373 extra = (const unsigned char *)
374 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB);
375 indirect = (const int32_t *)
376 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB);
382 /* We found a table entry. Now see whether the
383 character we are currently at has the same
384 equivalance class value. */
385 int len = weights[idx];
387 const UCHAR *np = (const UCHAR *) n;
389 idx2 = findidx (&np);
390 if (idx2 != 0 && len == weights[idx2])
395 && (weights[idx + 1 + cnt]
396 == weights[idx2 + 1 + cnt]))
408 else if (c == L_('\0'))
409 /* [ (unterminated) loses. */
413 bool is_range = false;
416 bool is_seqval = false;
418 if (c == L_('[') && *p == L_('.'))
421 _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
422 const CHAR *startp = p;
428 if (c == L_('.') && p[1] == L_(']'))
438 /* We have to handling the symbols differently in
439 ranges since then the collation sequence is
441 is_range = *p == L_('-') && p[1] != L_('\0');
445 /* There are no names defined in the collation
446 data. Therefore we only accept the trivial
447 names consisting of the character itself. */
451 if (!is_range && *n == startp[1])
460 const int32_t *symb_table;
461 # ifdef WIDE_CHAR_VERSION
465 # define str (startp + 1)
467 const unsigned char *extra;
473 # ifdef WIDE_CHAR_VERSION
474 /* We have to convert the name to a single-byte
475 string. This is possible since the names
476 consist of ASCII characters and the internal
477 representation is UCS4. */
478 for (strcnt = 0; strcnt < c1; ++strcnt)
479 str[strcnt] = startp[1 + strcnt];
483 _NL_CURRENT_WORD (LC_COLLATE,
484 _NL_COLLATE_SYMB_HASH_SIZEMB);
485 symb_table = (const int32_t *)
486 _NL_CURRENT (LC_COLLATE,
487 _NL_COLLATE_SYMB_TABLEMB);
488 extra = (const unsigned char *)
489 _NL_CURRENT (LC_COLLATE,
490 _NL_COLLATE_SYMB_EXTRAMB);
492 /* Locate the character in the hashing table. */
493 hash = elem_hash (str, c1);
496 elem = hash % table_size;
497 if (symb_table[2 * elem] != 0)
499 second = hash % (table_size - 2) + 1;
503 /* First compare the hashing value. */
504 if (symb_table[2 * elem] == hash
506 == extra[symb_table[2 * elem + 1]])
508 &extra[symb_table[2 * elem
512 /* Yep, this is the entry. */
513 idx = symb_table[2 * elem + 1];
514 idx += 1 + extra[idx];
521 while (symb_table[2 * elem] != 0);
524 if (symb_table[2 * elem] != 0)
526 /* Compare the byte sequence but only if
527 this is not part of a range. */
528 # ifdef WIDE_CHAR_VERSION
531 idx += 1 + extra[idx];
532 /* Adjust for the alignment. */
533 idx = (idx + 3) & ~3;
535 wextra = (int32_t *) &extra[idx + 4];
540 # ifdef WIDE_CHAR_VERSION
542 (int32_t) c1 < wextra[idx];
544 if (n[c1] != wextra[1 + c1])
547 if ((int32_t) c1 == wextra[idx])
550 for (c1 = 0; c1 < extra[idx]; ++c1)
551 if (n[c1] != extra[1 + c1])
554 if (c1 == extra[idx])
559 /* Get the collation sequence value. */
561 # ifdef WIDE_CHAR_VERSION
562 cold = wextra[1 + wextra[idx]];
564 /* Adjust for the alignment. */
565 idx += 1 + extra[idx];
566 idx = (idx + 3) & ~4;
567 cold = *((int32_t *) &extra[idx]);
574 /* No valid character. Match it as a
576 if (!is_range && *n == str[0])
593 /* We have to handling the symbols differently in
594 ranges since then the collation sequence is
596 is_range = (*p == L_('-') && p[1] != L_('\0')
599 if (!is_range && c == fn)
602 /* This is needed if we goto normal_bracket; from
603 outside of is_seqval's scope. */
609 if (c == L_('-') && *p != L_(']'))
612 /* We have to find the collation sequence
613 value for C. Collation sequence is nothing
614 we can regularly access. The sequence
615 value is defined by the order in which the
616 definitions of the collation values for the
617 various characters appear in the source
618 file. A strange concept, nowhere
624 # ifdef WIDE_CHAR_VERSION
625 /* Search in the `names' array for the characters. */
626 fcollseq = __collseq_table_lookup (collseq, fn);
627 if (fcollseq == ~((uint32_t) 0))
628 /* XXX We don't know anything about the character
629 we are supposed to match. This means we are
631 goto range_not_matched;
636 lcollseq = __collseq_table_lookup (collseq, cold);
638 fcollseq = collseq[fn];
639 lcollseq = is_seqval ? cold : collseq[(UCHAR) cold];
643 if (cend == L_('[') && *p == L_('.'))
646 _NL_CURRENT_WORD (LC_COLLATE,
648 const CHAR *startp = p;
654 if (c == L_('.') && p[1] == L_(']'))
666 /* There are no names defined in the
667 collation data. Therefore we only
668 accept the trivial names consisting
669 of the character itself. */
678 const int32_t *symb_table;
679 # ifdef WIDE_CHAR_VERSION
683 # define str (startp + 1)
685 const unsigned char *extra;
691 # ifdef WIDE_CHAR_VERSION
692 /* We have to convert the name to a single-byte
693 string. This is possible since the names
694 consist of ASCII characters and the internal
695 representation is UCS4. */
696 for (strcnt = 0; strcnt < c1; ++strcnt)
697 str[strcnt] = startp[1 + strcnt];
701 _NL_CURRENT_WORD (LC_COLLATE,
702 _NL_COLLATE_SYMB_HASH_SIZEMB);
703 symb_table = (const int32_t *)
704 _NL_CURRENT (LC_COLLATE,
705 _NL_COLLATE_SYMB_TABLEMB);
706 extra = (const unsigned char *)
707 _NL_CURRENT (LC_COLLATE,
708 _NL_COLLATE_SYMB_EXTRAMB);
710 /* Locate the character in the hashing
712 hash = elem_hash (str, c1);
715 elem = hash % table_size;
716 if (symb_table[2 * elem] != 0)
718 second = hash % (table_size - 2) + 1;
722 /* First compare the hashing value. */
723 if (symb_table[2 * elem] == hash
725 == extra[symb_table[2 * elem + 1]])
727 &extra[symb_table[2 * elem + 1]
730 /* Yep, this is the entry. */
731 idx = symb_table[2 * elem + 1];
732 idx += 1 + extra[idx];
739 while (symb_table[2 * elem] != 0);
742 if (symb_table[2 * elem] != 0)
744 /* Compare the byte sequence but only if
745 this is not part of a range. */
746 # ifdef WIDE_CHAR_VERSION
749 idx += 1 + extra[idx];
750 /* Adjust for the alignment. */
751 idx = (idx + 3) & ~4;
753 wextra = (int32_t *) &extra[idx + 4];
755 /* Get the collation sequence value. */
757 # ifdef WIDE_CHAR_VERSION
758 cend = wextra[1 + wextra[idx]];
760 /* Adjust for the alignment. */
761 idx += 1 + extra[idx];
762 idx = (idx + 3) & ~4;
763 cend = *((int32_t *) &extra[idx]);
766 else if (symb_table[2 * elem] != 0 && c1 == 1)
778 if (!(flags & FNM_NOESCAPE) && cend == L_('\\'))
780 if (cend == L_('\0'))
785 /* XXX It is not entirely clear to me how to handle
786 characters which are not mentioned in the
787 collation specification. */
789 # ifdef WIDE_CHAR_VERSION
790 lcollseq == 0xffffffff ||
792 lcollseq <= fcollseq)
794 /* We have to look at the upper bound. */
801 # ifdef WIDE_CHAR_VERSION
803 __collseq_table_lookup (collseq, cend);
804 if (hcollseq == ~((uint32_t) 0))
806 /* Hum, no information about the upper
807 bound. The matching succeeds if the
808 lower bound is matched exactly. */
809 if (lcollseq != fcollseq)
810 goto range_not_matched;
815 hcollseq = collseq[cend];
819 if (lcollseq <= hcollseq && fcollseq <= hcollseq)
822 # ifdef WIDE_CHAR_VERSION
826 /* We use a boring value comparison of the character
827 values. This is better than comparing using
828 `strcoll' since the latter would have surprising
829 and sometimes fatal consequences. */
832 if (!(flags & FNM_NOESCAPE) && cend == L_('\\'))
834 if (cend == L_('\0'))
838 if (cold <= fn && fn <= cend)
855 /* Skip the rest of the [...] that already matched. */
862 /* [... (unterminated) loses. */
865 if (!(flags & FNM_NOESCAPE) && c == L_('\\'))
869 /* XXX 1003.2d11 is unclear if this is right. */
872 else if (c == L_('[') && *p == L_(':'))
875 const CHAR *startp = p;
880 if (++c1 == CHAR_CLASS_MAX_LENGTH)
883 if (*p == L_(':') && p[1] == L_(']'))
886 if (c < L_('a') || c >= L_('z'))
895 else if (c == L_('[') && *p == L_('='))
901 if (c != L_('=') || p[1] != L_(']'))
906 else if (c == L_('[') && *p == L_('.'))
915 if (*p == L_('.') && p[1] == L_(']'))
922 while (c != L_(']'));
931 if (__builtin_expect (flags & FNM_EXTMATCH, 0) && *p == '(')
935 res = EXT (c, p, n, string_end, no_leading_period, flags);
942 if (NO_LEADING_PERIOD (flags))
944 if (n == string_end || c != (UCHAR) *n)
947 new_no_leading_period = true;
953 if (n == string_end || c != FOLD ((UCHAR) *n))
957 no_leading_period = new_no_leading_period;
964 if ((flags & FNM_LEADING_DIR) && n != string_end && *n == L_('/'))
965 /* The FNM_LEADING_DIR flag says that "foo*" matches "foobar/frobozz". */
974 END (const CHAR *pattern)
976 const CHAR *p = pattern;
979 if (*++p == L_('\0'))
980 /* This is an invalid pattern. */
982 else if (*p == L_('['))
984 /* Handle brackets special. */
985 if (posixly_correct == 0)
986 posixly_correct = getenv ("POSIXLY_CORRECT") != NULL ? 1 : -1;
988 /* Skip the not sign. We have to recognize it because of a possibly
990 if (*++p == L_('!') || (posixly_correct < 0 && *p == L_('^')))
992 /* A leading ']' is recognized as such. */
995 /* Skip over all characters of the list. */
996 while (*p != L_(']'))
997 if (*p++ == L_('\0'))
998 /* This is no valid pattern. */
1001 else if ((*p == L_('?') || *p == L_('*') || *p == L_('+') || *p == L_('@')
1002 || *p == L_('!')) && p[1] == L_('('))
1004 else if (*p == L_(')'))
1013 EXT (INT opt, const CHAR *pattern, const CHAR *string, const CHAR *string_end,
1014 bool no_leading_period, int flags)
1020 struct patternlist *next;
1023 struct patternlist **lastp = &list;
1024 size_t pattern_len = STRLEN (pattern);
1027 enum { ALLOCA_LIMIT = 8000 };
1029 /* Parse the pattern. Store the individual parts in the list. */
1031 for (startp = p = pattern + 1; ; ++p)
1033 /* This is an invalid pattern. */
1035 else if (*p == L_('['))
1037 /* Handle brackets special. */
1038 if (posixly_correct == 0)
1039 posixly_correct = getenv ("POSIXLY_CORRECT") != NULL ? 1 : -1;
1041 /* Skip the not sign. We have to recognize it because of a possibly
1043 if (*++p == L_('!') || (posixly_correct < 0 && *p == L_('^')))
1045 /* A leading ']' is recognized as such. */
1048 /* Skip over all characters of the list. */
1049 while (*p != L_(']'))
1050 if (*p++ == L_('\0'))
1051 /* This is no valid pattern. */
1054 else if ((*p == L_('?') || *p == L_('*') || *p == L_('+') || *p == L_('@')
1055 || *p == L_('!')) && p[1] == L_('('))
1056 /* Remember the nesting level. */
1058 else if (*p == L_(')'))
1062 /* This means we found the end of the pattern. */
1063 #define NEW_PATTERN \
1064 struct patternlist *newp; \
1069 plen = (opt == L_('?') || opt == L_('@') \
1071 : p - startp + 1); \
1072 plensize = plen * sizeof (CHAR); \
1073 newpsize = offsetof (struct patternlist, str) + plensize; \
1074 if ((size_t) -1 / sizeof (CHAR) < plen \
1075 || newpsize < offsetof (struct patternlist, str) \
1076 || ALLOCA_LIMIT <= newpsize) \
1078 newp = (struct patternlist *) alloca (newpsize); \
1079 *((CHAR *) MEMPCPY (newp->str, startp, p - startp)) = L_('\0'); \
1080 newp->next = NULL; \
1087 else if (*p == L_('|'))
1095 assert (list != NULL);
1096 assert (p[-1] == L_(')'));
1102 if (FCT (p, string, string_end, no_leading_period, flags) == 0)
1109 for (rs = string; rs <= string_end; ++rs)
1110 /* First match the prefix with the current pattern with the
1112 if (FCT (list->str, string, rs, no_leading_period,
1113 flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD) == 0
1114 /* This was successful. Now match the rest with the rest
1116 && (FCT (p, rs, string_end,
1119 : rs[-1] == '/' && NO_LEADING_PERIOD (flags),
1120 flags & FNM_FILE_NAME
1121 ? flags : flags & ~FNM_PERIOD) == 0
1122 /* This didn't work. Try the whole pattern. */
1124 && FCT (pattern - 1, rs, string_end,
1127 : rs[-1] == '/' && NO_LEADING_PERIOD (flags),
1128 flags & FNM_FILE_NAME
1129 ? flags : flags & ~FNM_PERIOD) == 0)))
1130 /* It worked. Signal success. */
1133 while ((list = list->next) != NULL);
1135 /* None of the patterns lead to a match. */
1139 if (FCT (p, string, string_end, no_leading_period, flags) == 0)
1145 /* I cannot believe it but `strcat' is actually acceptable
1146 here. Match the entire string with the prefix from the
1147 pattern list and the rest of the pattern following the
1149 if (FCT (STRCAT (list->str, p), string, string_end,
1151 flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD) == 0)
1152 /* It worked. Signal success. */
1154 while ((list = list->next) != NULL);
1156 /* None of the patterns lead to a match. */
1160 for (rs = string; rs <= string_end; ++rs)
1162 struct patternlist *runp;
1164 for (runp = list; runp != NULL; runp = runp->next)
1165 if (FCT (runp->str, string, rs, no_leading_period,
1166 flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD) == 0)
1169 /* If none of the patterns matched see whether the rest does. */
1171 && (FCT (p, rs, string_end,
1174 : rs[-1] == '/' && NO_LEADING_PERIOD (flags),
1175 flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD)
1177 /* This is successful. */
1181 /* None of the patterns together with the rest of the pattern
1186 assert (! "Invalid extended matching operator");