5f803924be90073b0f3c6636ce0ce0ea427f84f9
[pspp] / lib / fnmatch_loop.c
1 /* Copyright (C) 1991,1992,1993,1996,1997,1998,1999,2000,2001,2002,2003,2004,2005,2006
2    Free Software Foundation, Inc.
3    This file is part of the GNU C Library.
4
5    This program is free software; you can redistribute it and/or modify
6    it under the terms of the GNU General Public License as published by
7    the Free Software Foundation; either version 2, or (at your option)
8    any later version.
9
10    This program is distributed in the hope that it will be useful,
11    but WITHOUT ANY WARRANTY; without even the implied warranty of
12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13    GNU General Public License for more details.
14
15    You should have received a copy of the GNU General Public License
16    along with this program; if not, write to the Free Software Foundation,
17    Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.  */
18
19 /* Match STRING against the file name pattern PATTERN, returning zero if
20    it matches, nonzero if not.  */
21 static int EXT (INT opt, const CHAR *pattern, const CHAR *string,
22                 const CHAR *string_end, bool no_leading_period, int flags)
23      internal_function;
24 static const CHAR *END (const CHAR *patternp) internal_function;
25
26 static int
27 internal_function
28 FCT (const CHAR *pattern, const CHAR *string, const CHAR *string_end,
29      bool no_leading_period, int flags)
30 {
31   register const CHAR *p = pattern, *n = string;
32   register UCHAR c;
33 #ifdef _LIBC
34 # if WIDE_CHAR_VERSION
35   const char *collseq = (const char *)
36     _NL_CURRENT(LC_COLLATE, _NL_COLLATE_COLLSEQWC);
37 # else
38   const UCHAR *collseq = (const UCHAR *)
39     _NL_CURRENT(LC_COLLATE, _NL_COLLATE_COLLSEQMB);
40 # endif
41 #endif
42
43   while ((c = *p++) != L_('\0'))
44     {
45       bool new_no_leading_period = false;
46       c = FOLD (c);
47
48       switch (c)
49         {
50         case L_('?'):
51           if (__builtin_expect (flags & FNM_EXTMATCH, 0) && *p == '(')
52             {
53               int res;
54
55               res = EXT (c, p, n, string_end, no_leading_period,
56                          flags);
57               if (res != -1)
58                 return res;
59             }
60
61           if (n == string_end)
62             return FNM_NOMATCH;
63           else if (*n == L_('/') && (flags & FNM_FILE_NAME))
64             return FNM_NOMATCH;
65           else if (*n == L_('.') && no_leading_period)
66             return FNM_NOMATCH;
67           break;
68
69         case L_('\\'):
70           if (!(flags & FNM_NOESCAPE))
71             {
72               c = *p++;
73               if (c == L_('\0'))
74                 /* Trailing \ loses.  */
75                 return FNM_NOMATCH;
76               c = FOLD (c);
77             }
78           if (n == string_end || FOLD ((UCHAR) *n) != c)
79             return FNM_NOMATCH;
80           break;
81
82         case L_('*'):
83           if (__builtin_expect (flags & FNM_EXTMATCH, 0) && *p == '(')
84             {
85               int res;
86
87               res = EXT (c, p, n, string_end, no_leading_period,
88                          flags);
89               if (res != -1)
90                 return res;
91             }
92
93           if (n != string_end && *n == L_('.') && no_leading_period)
94             return FNM_NOMATCH;
95
96           for (c = *p++; c == L_('?') || c == L_('*'); c = *p++)
97             {
98               if (*p == L_('(') && (flags & FNM_EXTMATCH) != 0)
99                 {
100                   const CHAR *endp = END (p);
101                   if (endp != p)
102                     {
103                       /* This is a pattern.  Skip over it.  */
104                       p = endp;
105                       continue;
106                     }
107                 }
108
109               if (c == L_('?'))
110                 {
111                   /* A ? needs to match one character.  */
112                   if (n == string_end)
113                     /* There isn't another character; no match.  */
114                     return FNM_NOMATCH;
115                   else if (*n == L_('/')
116                            && __builtin_expect (flags & FNM_FILE_NAME, 0))
117                     /* A slash does not match a wildcard under
118                        FNM_FILE_NAME.  */
119                     return FNM_NOMATCH;
120                   else
121                     /* One character of the string is consumed in matching
122                        this ? wildcard, so *??? won't match if there are
123                        less than three characters.  */
124                     ++n;
125                 }
126             }
127
128           if (c == L_('\0'))
129             /* The wildcard(s) is/are the last element of the pattern.
130                If the name is a file name and contains another slash
131                this means it cannot match, unless the FNM_LEADING_DIR
132                flag is set.  */
133             {
134               int result = (flags & FNM_FILE_NAME) == 0 ? 0 : FNM_NOMATCH;
135
136               if (flags & FNM_FILE_NAME)
137                 {
138                   if (flags & FNM_LEADING_DIR)
139                     result = 0;
140                   else
141                     {
142                       if (MEMCHR (n, L_('/'), string_end - n) == NULL)
143                         result = 0;
144                     }
145                 }
146
147               return result;
148             }
149           else
150             {
151               const CHAR *endp;
152
153               endp = MEMCHR (n, (flags & FNM_FILE_NAME) ? L_('/') : L_('\0'),
154                              string_end - n);
155               if (endp == NULL)
156                 endp = string_end;
157
158               if (c == L_('[')
159                   || (__builtin_expect (flags & FNM_EXTMATCH, 0) != 0
160                       && (c == L_('@') || c == L_('+') || c == L_('!'))
161                       && *p == L_('(')))
162                 {
163                   int flags2 = ((flags & FNM_FILE_NAME)
164                                 ? flags : (flags & ~FNM_PERIOD));
165                   bool no_leading_period2 = no_leading_period;
166
167                   for (--p; n < endp; ++n, no_leading_period2 = false)
168                     if (FCT (p, n, string_end, no_leading_period2, flags2)
169                         == 0)
170                       return 0;
171                 }
172               else if (c == L_('/') && (flags & FNM_FILE_NAME))
173                 {
174                   while (n < string_end && *n != L_('/'))
175                     ++n;
176                   if (n < string_end && *n == L_('/')
177                       && (FCT (p, n + 1, string_end, flags & FNM_PERIOD, flags)
178                           == 0))
179                     return 0;
180                 }
181               else
182                 {
183                   int flags2 = ((flags & FNM_FILE_NAME)
184                                 ? flags : (flags & ~FNM_PERIOD));
185                   int no_leading_period2 = no_leading_period;
186
187                   if (c == L_('\\') && !(flags & FNM_NOESCAPE))
188                     c = *p;
189                   c = FOLD (c);
190                   for (--p; n < endp; ++n, no_leading_period2 = false)
191                     if (FOLD ((UCHAR) *n) == c
192                         && (FCT (p, n, string_end, no_leading_period2, flags2)
193                             == 0))
194                       return 0;
195                 }
196             }
197
198           /* If we come here no match is possible with the wildcard.  */
199           return FNM_NOMATCH;
200
201         case L_('['):
202           {
203             /* Nonzero if the sense of the character class is inverted.  */
204             register bool not;
205             CHAR cold;
206             UCHAR fn;
207
208             if (posixly_correct == 0)
209               posixly_correct = getenv ("POSIXLY_CORRECT") != NULL ? 1 : -1;
210
211             if (n == string_end)
212               return FNM_NOMATCH;
213
214             if (*n == L_('.') && no_leading_period)
215               return FNM_NOMATCH;
216
217             if (*n == L_('/') && (flags & FNM_FILE_NAME))
218               /* `/' cannot be matched.  */
219               return FNM_NOMATCH;
220
221             not = (*p == L_('!') || (posixly_correct < 0 && *p == L_('^')));
222             if (not)
223               ++p;
224
225             fn = FOLD ((UCHAR) *n);
226
227             c = *p++;
228             for (;;)
229               {
230                 if (!(flags & FNM_NOESCAPE) && c == L_('\\'))
231                   {
232                     if (*p == L_('\0'))
233                       return FNM_NOMATCH;
234                     c = FOLD ((UCHAR) *p);
235                     ++p;
236
237                     goto normal_bracket;
238                   }
239                 else if (c == L_('[') && *p == L_(':'))
240                   {
241                     /* Leave room for the null.  */
242                     CHAR str[CHAR_CLASS_MAX_LENGTH + 1];
243                     size_t c1 = 0;
244 #if defined _LIBC || WIDE_CHAR_SUPPORT
245                     wctype_t wt;
246 #endif
247                     const CHAR *startp = p;
248
249                     for (;;)
250                       {
251                         if (c1 == CHAR_CLASS_MAX_LENGTH)
252                           /* The name is too long and therefore the pattern
253                              is ill-formed.  */
254                           return FNM_NOMATCH;
255
256                         c = *++p;
257                         if (c == L_(':') && p[1] == L_(']'))
258                           {
259                             p += 2;
260                             break;
261                           }
262                         if (c < L_('a') || c >= L_('z'))
263                           {
264                             /* This cannot possibly be a character class name.
265                                Match it as a normal range.  */
266                             p = startp;
267                             c = L_('[');
268                             goto normal_bracket;
269                           }
270                         str[c1++] = c;
271                       }
272                     str[c1] = L_('\0');
273
274 #if defined _LIBC || WIDE_CHAR_SUPPORT
275                     wt = IS_CHAR_CLASS (str);
276                     if (wt == 0)
277                       /* Invalid character class name.  */
278                       return FNM_NOMATCH;
279
280 # if defined _LIBC && ! WIDE_CHAR_VERSION
281                     /* The following code is glibc specific but does
282                        there a good job in speeding up the code since
283                        we can avoid the btowc() call.  */
284                     if (_ISCTYPE ((UCHAR) *n, wt))
285                       goto matched;
286 # else
287                     if (ISWCTYPE (BTOWC ((UCHAR) *n), wt))
288                       goto matched;
289 # endif
290 #else
291                     if ((STREQ (str, L_("alnum")) && isalnum ((UCHAR) *n))
292                         || (STREQ (str, L_("alpha")) && isalpha ((UCHAR) *n))
293                         || (STREQ (str, L_("blank")) && isblank ((UCHAR) *n))
294                         || (STREQ (str, L_("cntrl")) && iscntrl ((UCHAR) *n))
295                         || (STREQ (str, L_("digit")) && isdigit ((UCHAR) *n))
296                         || (STREQ (str, L_("graph")) && isgraph ((UCHAR) *n))
297                         || (STREQ (str, L_("lower")) && islower ((UCHAR) *n))
298                         || (STREQ (str, L_("print")) && isprint ((UCHAR) *n))
299                         || (STREQ (str, L_("punct")) && ispunct ((UCHAR) *n))
300                         || (STREQ (str, L_("space")) && isspace ((UCHAR) *n))
301                         || (STREQ (str, L_("upper")) && isupper ((UCHAR) *n))
302                         || (STREQ (str, L_("xdigit")) && isxdigit ((UCHAR) *n)))
303                       goto matched;
304 #endif
305                     c = *p++;
306                   }
307 #ifdef _LIBC
308                 else if (c == L_('[') && *p == L_('='))
309                   {
310                     UCHAR str[1];
311                     uint32_t nrules =
312                       _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
313                     const CHAR *startp = p;
314
315                     c = *++p;
316                     if (c == L_('\0'))
317                       {
318                         p = startp;
319                         c = L_('[');
320                         goto normal_bracket;
321                       }
322                     str[0] = c;
323
324                     c = *++p;
325                     if (c != L_('=') || p[1] != L_(']'))
326                       {
327                         p = startp;
328                         c = L_('[');
329                         goto normal_bracket;
330                       }
331                     p += 2;
332
333                     if (nrules == 0)
334                       {
335                         if ((UCHAR) *n == str[0])
336                           goto matched;
337                       }
338                     else
339                       {
340                         const int32_t *table;
341 # if WIDE_CHAR_VERSION
342                         const int32_t *weights;
343                         const int32_t *extra;
344 # else
345                         const unsigned char *weights;
346                         const unsigned char *extra;
347 # endif
348                         const int32_t *indirect;
349                         int32_t idx;
350                         const UCHAR *cp = (const UCHAR *) str;
351
352                         /* This #include defines a local function!  */
353 # if WIDE_CHAR_VERSION
354 #  include <locale/weightwc.h>
355 # else
356 #  include <locale/weight.h>
357 # endif
358
359 # if WIDE_CHAR_VERSION
360                         table = (const int32_t *)
361                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEWC);
362                         weights = (const int32_t *)
363                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTWC);
364                         extra = (const int32_t *)
365                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAWC);
366                         indirect = (const int32_t *)
367                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTWC);
368 # else
369                         table = (const int32_t *)
370                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
371                         weights = (const unsigned char *)
372                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTMB);
373                         extra = (const unsigned char *)
374                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB);
375                         indirect = (const int32_t *)
376                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB);
377 # endif
378
379                         idx = findidx (&cp);
380                         if (idx != 0)
381                           {
382                             /* We found a table entry.  Now see whether the
383                                character we are currently at has the same
384                                equivalance class value.  */
385                             int len = weights[idx];
386                             int32_t idx2;
387                             const UCHAR *np = (const UCHAR *) n;
388
389                             idx2 = findidx (&np);
390                             if (idx2 != 0 && len == weights[idx2])
391                               {
392                                 int cnt = 0;
393
394                                 while (cnt < len
395                                        && (weights[idx + 1 + cnt]
396                                            == weights[idx2 + 1 + cnt]))
397                                   ++cnt;
398
399                                 if (cnt == len)
400                                   goto matched;
401                               }
402                           }
403                       }
404
405                     c = *p++;
406                   }
407 #endif
408                 else if (c == L_('\0'))
409                   /* [ (unterminated) loses.  */
410                   return FNM_NOMATCH;
411                 else
412                   {
413                     bool is_range = false;
414
415 #ifdef _LIBC
416                     bool is_seqval = false;
417
418                     if (c == L_('[') && *p == L_('.'))
419                       {
420                         uint32_t nrules =
421                           _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
422                         const CHAR *startp = p;
423                         size_t c1 = 0;
424
425                         while (1)
426                           {
427                             c = *++p;
428                             if (c == L_('.') && p[1] == L_(']'))
429                               {
430                                 p += 2;
431                                 break;
432                               }
433                             if (c == '\0')
434                               return FNM_NOMATCH;
435                             ++c1;
436                           }
437
438                         /* We have to handling the symbols differently in
439                            ranges since then the collation sequence is
440                            important.  */
441                         is_range = *p == L_('-') && p[1] != L_('\0');
442
443                         if (nrules == 0)
444                           {
445                             /* There are no names defined in the collation
446                                data.  Therefore we only accept the trivial
447                                names consisting of the character itself.  */
448                             if (c1 != 1)
449                               return FNM_NOMATCH;
450
451                             if (!is_range && *n == startp[1])
452                               goto matched;
453
454                             cold = startp[1];
455                             c = *p++;
456                           }
457                         else
458                           {
459                             int32_t table_size;
460                             const int32_t *symb_table;
461 # ifdef WIDE_CHAR_VERSION
462                             char str[c1];
463                             size_t strcnt;
464 # else
465 #  define str (startp + 1)
466 # endif
467                             const unsigned char *extra;
468                             int32_t idx;
469                             int32_t elem;
470                             int32_t second;
471                             int32_t hash;
472
473 # ifdef WIDE_CHAR_VERSION
474                             /* We have to convert the name to a single-byte
475                                string.  This is possible since the names
476                                consist of ASCII characters and the internal
477                                representation is UCS4.  */
478                             for (strcnt = 0; strcnt < c1; ++strcnt)
479                               str[strcnt] = startp[1 + strcnt];
480 # endif
481
482                             table_size =
483                               _NL_CURRENT_WORD (LC_COLLATE,
484                                                 _NL_COLLATE_SYMB_HASH_SIZEMB);
485                             symb_table = (const int32_t *)
486                               _NL_CURRENT (LC_COLLATE,
487                                            _NL_COLLATE_SYMB_TABLEMB);
488                             extra = (const unsigned char *)
489                               _NL_CURRENT (LC_COLLATE,
490                                            _NL_COLLATE_SYMB_EXTRAMB);
491
492                             /* Locate the character in the hashing table.  */
493                             hash = elem_hash (str, c1);
494
495                             idx = 0;
496                             elem = hash % table_size;
497                             if (symb_table[2 * elem] != 0)
498                               {
499                                 second = hash % (table_size - 2) + 1;
500
501                                 do
502                                   {
503                                     /* First compare the hashing value.  */
504                                     if (symb_table[2 * elem] == hash
505                                         && (c1
506                                             == extra[symb_table[2 * elem + 1]])
507                                         && memcmp (str,
508                                                    &extra[symb_table[2 * elem
509                                                                      + 1]
510                                                           + 1], c1) == 0)
511                                       {
512                                         /* Yep, this is the entry.  */
513                                         idx = symb_table[2 * elem + 1];
514                                         idx += 1 + extra[idx];
515                                         break;
516                                       }
517
518                                     /* Next entry.  */
519                                     elem += second;
520                                   }
521                                 while (symb_table[2 * elem] != 0);
522                               }
523
524                             if (symb_table[2 * elem] != 0)
525                               {
526                                 /* Compare the byte sequence but only if
527                                    this is not part of a range.  */
528 # ifdef WIDE_CHAR_VERSION
529                                 int32_t *wextra;
530
531                                 idx += 1 + extra[idx];
532                                 /* Adjust for the alignment.  */
533                                 idx = (idx + 3) & ~3;
534
535                                 wextra = (int32_t *) &extra[idx + 4];
536 # endif
537
538                                 if (! is_range)
539                                   {
540 # ifdef WIDE_CHAR_VERSION
541                                     for (c1 = 0;
542                                          (int32_t) c1 < wextra[idx];
543                                          ++c1)
544                                       if (n[c1] != wextra[1 + c1])
545                                         break;
546
547                                     if ((int32_t) c1 == wextra[idx])
548                                       goto matched;
549 # else
550                                     for (c1 = 0; c1 < extra[idx]; ++c1)
551                                       if (n[c1] != extra[1 + c1])
552                                         break;
553
554                                     if (c1 == extra[idx])
555                                       goto matched;
556 # endif
557                                   }
558
559                                 /* Get the collation sequence value.  */
560                                 is_seqval = true;
561 # ifdef WIDE_CHAR_VERSION
562                                 cold = wextra[1 + wextra[idx]];
563 # else
564                                 /* Adjust for the alignment.  */
565                                 idx += 1 + extra[idx];
566                                 idx = (idx + 3) & ~4;
567                                 cold = *((int32_t *) &extra[idx]);
568 # endif
569
570                                 c = *p++;
571                               }
572                             else if (c1 == 1)
573                               {
574                                 /* No valid character.  Match it as a
575                                    single byte.  */
576                                 if (!is_range && *n == str[0])
577                                   goto matched;
578
579                                 cold = str[0];
580                                 c = *p++;
581                               }
582                             else
583                               return FNM_NOMATCH;
584                           }
585                       }
586                     else
587 # undef str
588 #endif
589                       {
590                         c = FOLD (c);
591                       normal_bracket:
592
593                         /* We have to handling the symbols differently in
594                            ranges since then the collation sequence is
595                            important.  */
596                         is_range = (*p == L_('-') && p[1] != L_('\0')
597                                     && p[1] != L_(']'));
598
599                         if (!is_range && c == fn)
600                           goto matched;
601
602                         /* This is needed if we goto normal_bracket; from
603                            outside of is_seqval's scope.  */
604                         is_seqval = false;
605                         cold = c;
606                         c = *p++;
607                       }
608
609                     if (c == L_('-') && *p != L_(']'))
610                       {
611 #if _LIBC
612                         /* We have to find the collation sequence
613                            value for C.  Collation sequence is nothing
614                            we can regularly access.  The sequence
615                            value is defined by the order in which the
616                            definitions of the collation values for the
617                            various characters appear in the source
618                            file.  A strange concept, nowhere
619                            documented.  */
620                         uint32_t fcollseq;
621                         uint32_t lcollseq;
622                         UCHAR cend = *p++;
623
624 # ifdef WIDE_CHAR_VERSION
625                         /* Search in the `names' array for the characters.  */
626                         fcollseq = __collseq_table_lookup (collseq, fn);
627                         if (fcollseq == ~((uint32_t) 0))
628                           /* XXX We don't know anything about the character
629                              we are supposed to match.  This means we are
630                              failing.  */
631                           goto range_not_matched;
632
633                         if (is_seqval)
634                           lcollseq = cold;
635                         else
636                           lcollseq = __collseq_table_lookup (collseq, cold);
637 # else
638                         fcollseq = collseq[fn];
639                         lcollseq = is_seqval ? cold : collseq[(UCHAR) cold];
640 # endif
641
642                         is_seqval = false;
643                         if (cend == L_('[') && *p == L_('.'))
644                           {
645                             uint32_t nrules =
646                               _NL_CURRENT_WORD (LC_COLLATE,
647                                                 _NL_COLLATE_NRULES);
648                             const CHAR *startp = p;
649                             size_t c1 = 0;
650
651                             while (1)
652                               {
653                                 c = *++p;
654                                 if (c == L_('.') && p[1] == L_(']'))
655                                   {
656                                     p += 2;
657                                     break;
658                                   }
659                                 if (c == '\0')
660                                   return FNM_NOMATCH;
661                                 ++c1;
662                               }
663
664                             if (nrules == 0)
665                               {
666                                 /* There are no names defined in the
667                                    collation data.  Therefore we only
668                                    accept the trivial names consisting
669                                    of the character itself.  */
670                                 if (c1 != 1)
671                                   return FNM_NOMATCH;
672
673                                 cend = startp[1];
674                               }
675                             else
676                               {
677                                 int32_t table_size;
678                                 const int32_t *symb_table;
679 # ifdef WIDE_CHAR_VERSION
680                                 char str[c1];
681                                 size_t strcnt;
682 # else
683 #  define str (startp + 1)
684 # endif
685                                 const unsigned char *extra;
686                                 int32_t idx;
687                                 int32_t elem;
688                                 int32_t second;
689                                 int32_t hash;
690
691 # ifdef WIDE_CHAR_VERSION
692                                 /* We have to convert the name to a single-byte
693                                    string.  This is possible since the names
694                                    consist of ASCII characters and the internal
695                                    representation is UCS4.  */
696                                 for (strcnt = 0; strcnt < c1; ++strcnt)
697                                   str[strcnt] = startp[1 + strcnt];
698 # endif
699
700                                 table_size =
701                                   _NL_CURRENT_WORD (LC_COLLATE,
702                                                     _NL_COLLATE_SYMB_HASH_SIZEMB);
703                                 symb_table = (const int32_t *)
704                                   _NL_CURRENT (LC_COLLATE,
705                                                _NL_COLLATE_SYMB_TABLEMB);
706                                 extra = (const unsigned char *)
707                                   _NL_CURRENT (LC_COLLATE,
708                                                _NL_COLLATE_SYMB_EXTRAMB);
709
710                                 /* Locate the character in the hashing
711                                    table.  */
712                                 hash = elem_hash (str, c1);
713
714                                 idx = 0;
715                                 elem = hash % table_size;
716                                 if (symb_table[2 * elem] != 0)
717                                   {
718                                     second = hash % (table_size - 2) + 1;
719
720                                     do
721                                       {
722                                         /* First compare the hashing value.  */
723                                         if (symb_table[2 * elem] == hash
724                                             && (c1
725                                                 == extra[symb_table[2 * elem + 1]])
726                                             && memcmp (str,
727                                                        &extra[symb_table[2 * elem + 1]
728                                                               + 1], c1) == 0)
729                                           {
730                                             /* Yep, this is the entry.  */
731                                             idx = symb_table[2 * elem + 1];
732                                             idx += 1 + extra[idx];
733                                             break;
734                                           }
735
736                                         /* Next entry.  */
737                                         elem += second;
738                                       }
739                                     while (symb_table[2 * elem] != 0);
740                                   }
741
742                                 if (symb_table[2 * elem] != 0)
743                                   {
744                                     /* Compare the byte sequence but only if
745                                        this is not part of a range.  */
746 # ifdef WIDE_CHAR_VERSION
747                                     int32_t *wextra;
748
749                                     idx += 1 + extra[idx];
750                                     /* Adjust for the alignment.  */
751                                     idx = (idx + 3) & ~4;
752
753                                     wextra = (int32_t *) &extra[idx + 4];
754 # endif
755                                     /* Get the collation sequence value.  */
756                                     is_seqval = true;
757 # ifdef WIDE_CHAR_VERSION
758                                     cend = wextra[1 + wextra[idx]];
759 # else
760                                     /* Adjust for the alignment.  */
761                                     idx += 1 + extra[idx];
762                                     idx = (idx + 3) & ~4;
763                                     cend = *((int32_t *) &extra[idx]);
764 # endif
765                                   }
766                                 else if (symb_table[2 * elem] != 0 && c1 == 1)
767                                   {
768                                     cend = str[0];
769                                     c = *p++;
770                                   }
771                                 else
772                                   return FNM_NOMATCH;
773                               }
774 # undef str
775                           }
776                         else
777                           {
778                             if (!(flags & FNM_NOESCAPE) && cend == L_('\\'))
779                               cend = *p++;
780                             if (cend == L_('\0'))
781                               return FNM_NOMATCH;
782                             cend = FOLD (cend);
783                           }
784
785                         /* XXX It is not entirely clear to me how to handle
786                            characters which are not mentioned in the
787                            collation specification.  */
788                         if (
789 # ifdef WIDE_CHAR_VERSION
790                             lcollseq == 0xffffffff ||
791 # endif
792                             lcollseq <= fcollseq)
793                           {
794                             /* We have to look at the upper bound.  */
795                             uint32_t hcollseq;
796
797                             if (is_seqval)
798                               hcollseq = cend;
799                             else
800                               {
801 # ifdef WIDE_CHAR_VERSION
802                                 hcollseq =
803                                   __collseq_table_lookup (collseq, cend);
804                                 if (hcollseq == ~((uint32_t) 0))
805                                   {
806                                     /* Hum, no information about the upper
807                                        bound.  The matching succeeds if the
808                                        lower bound is matched exactly.  */
809                                     if (lcollseq != fcollseq)
810                                       goto range_not_matched;
811
812                                     goto matched;
813                                   }
814 # else
815                                 hcollseq = collseq[cend];
816 # endif
817                               }
818
819                             if (lcollseq <= hcollseq && fcollseq <= hcollseq)
820                               goto matched;
821                           }
822 # ifdef WIDE_CHAR_VERSION
823                       range_not_matched:
824 # endif
825 #else
826                         /* We use a boring value comparison of the character
827                            values.  This is better than comparing using
828                            `strcoll' since the latter would have surprising
829                            and sometimes fatal consequences.  */
830                         UCHAR cend = *p++;
831
832                         if (!(flags & FNM_NOESCAPE) && cend == L_('\\'))
833                           cend = *p++;
834                         if (cend == L_('\0'))
835                           return FNM_NOMATCH;
836
837                         /* It is a range.  */
838                         if (cold <= fn && fn <= cend)
839                           goto matched;
840 #endif
841
842                         c = *p++;
843                       }
844                   }
845
846                 if (c == L_(']'))
847                   break;
848               }
849
850             if (!not)
851               return FNM_NOMATCH;
852             break;
853
854           matched:
855             /* Skip the rest of the [...] that already matched.  */
856             do
857               {
858               ignore_next:
859                 c = *p++;
860
861                 if (c == L_('\0'))
862                   /* [... (unterminated) loses.  */
863                   return FNM_NOMATCH;
864
865                 if (!(flags & FNM_NOESCAPE) && c == L_('\\'))
866                   {
867                     if (*p == L_('\0'))
868                       return FNM_NOMATCH;
869                     /* XXX 1003.2d11 is unclear if this is right.  */
870                     ++p;
871                   }
872                 else if (c == L_('[') && *p == L_(':'))
873                   {
874                     int c1 = 0;
875                     const CHAR *startp = p;
876
877                     while (1)
878                       {
879                         c = *++p;
880                         if (++c1 == CHAR_CLASS_MAX_LENGTH)
881                           return FNM_NOMATCH;
882
883                         if (*p == L_(':') && p[1] == L_(']'))
884                           break;
885
886                         if (c < L_('a') || c >= L_('z'))
887                           {
888                             p = startp;
889                             goto ignore_next;
890                           }
891                       }
892                     p += 2;
893                     c = *p++;
894                   }
895                 else if (c == L_('[') && *p == L_('='))
896                   {
897                     c = *++p;
898                     if (c == L_('\0'))
899                       return FNM_NOMATCH;
900                     c = *++p;
901                     if (c != L_('=') || p[1] != L_(']'))
902                       return FNM_NOMATCH;
903                     p += 2;
904                     c = *p++;
905                   }
906                 else if (c == L_('[') && *p == L_('.'))
907                   {
908                     ++p;
909                     while (1)
910                       {
911                         c = *++p;
912                         if (c == '\0')
913                           return FNM_NOMATCH;
914
915                         if (*p == L_('.') && p[1] == L_(']'))
916                           break;
917                       }
918                     p += 2;
919                     c = *p++;
920                   }
921               }
922             while (c != L_(']'));
923             if (not)
924               return FNM_NOMATCH;
925           }
926           break;
927
928         case L_('+'):
929         case L_('@'):
930         case L_('!'):
931           if (__builtin_expect (flags & FNM_EXTMATCH, 0) && *p == '(')
932             {
933               int res;
934
935               res = EXT (c, p, n, string_end, no_leading_period, flags);
936               if (res != -1)
937                 return res;
938             }
939           goto normal_match;
940
941         case L_('/'):
942           if (NO_LEADING_PERIOD (flags))
943             {
944               if (n == string_end || c != (UCHAR) *n)
945                 return FNM_NOMATCH;
946
947               new_no_leading_period = true;
948               break;
949             }
950           /* FALLTHROUGH */
951         default:
952         normal_match:
953           if (n == string_end || c != FOLD ((UCHAR) *n))
954             return FNM_NOMATCH;
955         }
956
957       no_leading_period = new_no_leading_period;
958       ++n;
959     }
960
961   if (n == string_end)
962     return 0;
963
964   if ((flags & FNM_LEADING_DIR) && n != string_end && *n == L_('/'))
965     /* The FNM_LEADING_DIR flag says that "foo*" matches "foobar/frobozz".  */
966     return 0;
967
968   return FNM_NOMATCH;
969 }
970
971
972 static const CHAR *
973 internal_function
974 END (const CHAR *pattern)
975 {
976   const CHAR *p = pattern;
977
978   while (1)
979     if (*++p == L_('\0'))
980       /* This is an invalid pattern.  */
981       return pattern;
982     else if (*p == L_('['))
983       {
984         /* Handle brackets special.  */
985         if (posixly_correct == 0)
986           posixly_correct = getenv ("POSIXLY_CORRECT") != NULL ? 1 : -1;
987
988         /* Skip the not sign.  We have to recognize it because of a possibly
989            following ']'.  */
990         if (*++p == L_('!') || (posixly_correct < 0 && *p == L_('^')))
991           ++p;
992         /* A leading ']' is recognized as such.  */
993         if (*p == L_(']'))
994           ++p;
995         /* Skip over all characters of the list.  */
996         while (*p != L_(']'))
997           if (*p++ == L_('\0'))
998             /* This is no valid pattern.  */
999             return pattern;
1000       }
1001     else if ((*p == L_('?') || *p == L_('*') || *p == L_('+') || *p == L_('@')
1002               || *p == L_('!')) && p[1] == L_('('))
1003       p = END (p + 1);
1004     else if (*p == L_(')'))
1005       break;
1006
1007   return p + 1;
1008 }
1009
1010
1011 static int
1012 internal_function
1013 EXT (INT opt, const CHAR *pattern, const CHAR *string, const CHAR *string_end,
1014      bool no_leading_period, int flags)
1015 {
1016   const CHAR *startp;
1017   size_t level;
1018   struct patternlist
1019   {
1020     struct patternlist *next;
1021     CHAR str[1];
1022   } *list = NULL;
1023   struct patternlist **lastp = &list;
1024   size_t pattern_len = STRLEN (pattern);
1025   const CHAR *p;
1026   const CHAR *rs;
1027   enum { ALLOCA_LIMIT = 8000 };
1028
1029   /* Parse the pattern.  Store the individual parts in the list.  */
1030   level = 0;
1031   for (startp = p = pattern + 1; ; ++p)
1032     if (*p == L_('\0'))
1033       /* This is an invalid pattern.  */
1034       return -1;
1035     else if (*p == L_('['))
1036       {
1037         /* Handle brackets special.  */
1038         if (posixly_correct == 0)
1039           posixly_correct = getenv ("POSIXLY_CORRECT") != NULL ? 1 : -1;
1040
1041         /* Skip the not sign.  We have to recognize it because of a possibly
1042            following ']'.  */
1043         if (*++p == L_('!') || (posixly_correct < 0 && *p == L_('^')))
1044           ++p;
1045         /* A leading ']' is recognized as such.  */
1046         if (*p == L_(']'))
1047           ++p;
1048         /* Skip over all characters of the list.  */
1049         while (*p != L_(']'))
1050           if (*p++ == L_('\0'))
1051             /* This is no valid pattern.  */
1052             return -1;
1053       }
1054     else if ((*p == L_('?') || *p == L_('*') || *p == L_('+') || *p == L_('@')
1055               || *p == L_('!')) && p[1] == L_('('))
1056       /* Remember the nesting level.  */
1057       ++level;
1058     else if (*p == L_(')'))
1059       {
1060         if (level-- == 0)
1061           {
1062             /* This means we found the end of the pattern.  */
1063 #define NEW_PATTERN \
1064             struct patternlist *newp;                                         \
1065             size_t plen;                                                      \
1066             size_t plensize;                                                  \
1067             size_t newpsize;                                                  \
1068                                                                               \
1069             plen = (opt == L_('?') || opt == L_('@')                          \
1070                     ? pattern_len                                             \
1071                     : p - startp + 1);                                        \
1072             plensize = plen * sizeof (CHAR);                                  \
1073             newpsize = offsetof (struct patternlist, str) + plensize;         \
1074             if ((size_t) -1 / sizeof (CHAR) < plen                            \
1075                 || newpsize < offsetof (struct patternlist, str)              \
1076                 || ALLOCA_LIMIT <= newpsize)                                  \
1077               return -1;                                                      \
1078             newp = (struct patternlist *) alloca (newpsize);                  \
1079             *((CHAR *) MEMPCPY (newp->str, startp, p - startp)) = L_('\0');    \
1080             newp->next = NULL;                                                \
1081             *lastp = newp;                                                    \
1082             lastp = &newp->next
1083             NEW_PATTERN;
1084             break;
1085           }
1086       }
1087     else if (*p == L_('|'))
1088       {
1089         if (level == 0)
1090           {
1091             NEW_PATTERN;
1092             startp = p + 1;
1093           }
1094       }
1095   assert (list != NULL);
1096   assert (p[-1] == L_(')'));
1097 #undef NEW_PATTERN
1098
1099   switch (opt)
1100     {
1101     case L_('*'):
1102       if (FCT (p, string, string_end, no_leading_period, flags) == 0)
1103         return 0;
1104       /* FALLTHROUGH */
1105
1106     case L_('+'):
1107       do
1108         {
1109           for (rs = string; rs <= string_end; ++rs)
1110             /* First match the prefix with the current pattern with the
1111                current pattern.  */
1112             if (FCT (list->str, string, rs, no_leading_period,
1113                      flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD) == 0
1114                 /* This was successful.  Now match the rest with the rest
1115                    of the pattern.  */
1116                 && (FCT (p, rs, string_end,
1117                          rs == string
1118                          ? no_leading_period
1119                          : rs[-1] == '/' && NO_LEADING_PERIOD (flags),
1120                          flags & FNM_FILE_NAME
1121                          ? flags : flags & ~FNM_PERIOD) == 0
1122                     /* This didn't work.  Try the whole pattern.  */
1123                     || (rs != string
1124                         && FCT (pattern - 1, rs, string_end,
1125                                 rs == string
1126                                 ? no_leading_period
1127                                 : rs[-1] == '/' && NO_LEADING_PERIOD (flags),
1128                                 flags & FNM_FILE_NAME
1129                                 ? flags : flags & ~FNM_PERIOD) == 0)))
1130               /* It worked.  Signal success.  */
1131               return 0;
1132         }
1133       while ((list = list->next) != NULL);
1134
1135       /* None of the patterns lead to a match.  */
1136       return FNM_NOMATCH;
1137
1138     case L_('?'):
1139       if (FCT (p, string, string_end, no_leading_period, flags) == 0)
1140         return 0;
1141       /* FALLTHROUGH */
1142
1143     case L_('@'):
1144       do
1145         /* I cannot believe it but `strcat' is actually acceptable
1146            here.  Match the entire string with the prefix from the
1147            pattern list and the rest of the pattern following the
1148            pattern list.  */
1149         if (FCT (STRCAT (list->str, p), string, string_end,
1150                  no_leading_period,
1151                  flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD) == 0)
1152           /* It worked.  Signal success.  */
1153           return 0;
1154       while ((list = list->next) != NULL);
1155
1156       /* None of the patterns lead to a match.  */
1157       return FNM_NOMATCH;
1158
1159     case L_('!'):
1160       for (rs = string; rs <= string_end; ++rs)
1161         {
1162           struct patternlist *runp;
1163
1164           for (runp = list; runp != NULL; runp = runp->next)
1165             if (FCT (runp->str, string, rs,  no_leading_period,
1166                      flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD) == 0)
1167               break;
1168
1169           /* If none of the patterns matched see whether the rest does.  */
1170           if (runp == NULL
1171               && (FCT (p, rs, string_end,
1172                        rs == string
1173                        ? no_leading_period
1174                        : rs[-1] == '/' && NO_LEADING_PERIOD (flags),
1175                        flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD)
1176                   == 0))
1177             /* This is successful.  */
1178             return 0;
1179         }
1180
1181       /* None of the patterns together with the rest of the pattern
1182          lead to a match.  */
1183       return FNM_NOMATCH;
1184
1185     default:
1186       assert (! "Invalid extended matching operator");
1187       break;
1188     }
1189
1190   return -1;
1191 }
1192
1193
1194 #undef FOLD
1195 #undef CHAR
1196 #undef UCHAR
1197 #undef INT
1198 #undef FCT
1199 #undef EXT
1200 #undef END
1201 #undef MEMPCPY
1202 #undef MEMCHR
1203 #undef STRCOLL
1204 #undef STRLEN
1205 #undef STRCAT
1206 #undef L_
1207 #undef BTOWC