pintos-os.org Git - pspp/blob - lib/fnmatch_loop.c

   1 /* Copyright (C) 1991-1993, 1996-2006, 2009-2011 Free Software Foundation, Inc.
   2    This file is part of the GNU C Library.
   3
   4    This program is free software; you can redistribute it and/or modify
   5    it under the terms of the GNU General Public License as published by
   6    the Free Software Foundation; either version 2, or (at your option)
   7    any later version.
   8
   9    This program is distributed in the hope that it will be useful,
  10    but WITHOUT ANY WARRANTY; without even the implied warranty of
  11    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12    GNU General Public License for more details.
  13
  14    You should have received a copy of the GNU General Public License
  15    along with this program; if not, write to the Free Software Foundation,
  16    Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.  */
  17
  18 /* Match STRING against the file name pattern PATTERN, returning zero if
  19    it matches, nonzero if not.  */
  20 static int EXT (INT opt, const CHAR *pattern, const CHAR *string,
  21                 const CHAR *string_end, bool no_leading_period, int flags)
  22      internal_function;
  23 static const CHAR *END (const CHAR *patternp) internal_function;
  24
  25 static int
  26 internal_function
  27 FCT (const CHAR *pattern, const CHAR *string, const CHAR *string_end,
  28      bool no_leading_period, int flags)
  29 {
  30   register const CHAR *p = pattern, *n = string;
  31   register UCHAR c;
  32 #ifdef _LIBC
  33 # if WIDE_CHAR_VERSION
  34   const char *collseq = (const char *)
  35     _NL_CURRENT(LC_COLLATE, _NL_COLLATE_COLLSEQWC);
  36 # else
  37   const UCHAR *collseq = (const UCHAR *)
  38     _NL_CURRENT(LC_COLLATE, _NL_COLLATE_COLLSEQMB);
  39 # endif
  40 #endif
  41
  42   while ((c = *p++) != L_('\0'))
  43     {
  44       bool new_no_leading_period = false;
  45       c = FOLD (c);
  46
  47       switch (c)
  48         {
  49         case L_('?'):
  50           if (__builtin_expect (flags & FNM_EXTMATCH, 0) && *p == '(')
  51             {
  52               int res;
  53
  54               res = EXT (c, p, n, string_end, no_leading_period,
  55                          flags);
  56               if (res != -1)
  57                 return res;
  58             }
  59
  60           if (n == string_end)
  61             return FNM_NOMATCH;
  62           else if (*n == L_('/') && (flags & FNM_FILE_NAME))
  63             return FNM_NOMATCH;
  64           else if (*n == L_('.') && no_leading_period)
  65             return FNM_NOMATCH;
  66           break;
  67
  68         case L_('\\'):
  69           if (!(flags & FNM_NOESCAPE))
  70             {
  71               c = *p++;
  72               if (c == L_('\0'))
  73                 /* Trailing \ loses.  */
  74                 return FNM_NOMATCH;
  75               c = FOLD (c);
  76             }
  77           if (n == string_end || FOLD ((UCHAR) *n) != c)
  78             return FNM_NOMATCH;
  79           break;
  80
  81         case L_('*'):
  82           if (__builtin_expect (flags & FNM_EXTMATCH, 0) && *p == '(')
  83             {
  84               int res;
  85
  86               res = EXT (c, p, n, string_end, no_leading_period,
  87                          flags);
  88               if (res != -1)
  89                 return res;
  90             }
  91
  92           if (n != string_end && *n == L_('.') && no_leading_period)
  93             return FNM_NOMATCH;
  94
  95           for (c = *p++; c == L_('?') || c == L_('*'); c = *p++)
  96             {
  97               if (*p == L_('(') && (flags & FNM_EXTMATCH) != 0)
  98                 {
  99                   const CHAR *endp = END (p);
 100                   if (endp != p)
 101                     {
 102                       /* This is a pattern.  Skip over it.  */
 103                       p = endp;
 104                       continue;
 105                     }
 106                 }
 107
 108               if (c == L_('?'))
 109                 {
 110                   /* A ? needs to match one character.  */
 111                   if (n == string_end)
 112                     /* There isn't another character; no match.  */
 113                     return FNM_NOMATCH;
 114                   else if (*n == L_('/')
 115                            && __builtin_expect (flags & FNM_FILE_NAME, 0))
 116                     /* A slash does not match a wildcard under
 117                        FNM_FILE_NAME.  */
 118                     return FNM_NOMATCH;
 119                   else
 120                     /* One character of the string is consumed in matching
 121                        this ? wildcard, so *??? won't match if there are
 122                        less than three characters.  */
 123                     ++n;
 124                 }
 125             }
 126
 127           if (c == L_('\0'))
 128             /* The wildcard(s) is/are the last element of the pattern.
 129                If the name is a file name and contains another slash
 130                this means it cannot match, unless the FNM_LEADING_DIR
 131                flag is set.  */
 132             {
 133               int result = (flags & FNM_FILE_NAME) == 0 ? 0 : FNM_NOMATCH;
 134
 135               if (flags & FNM_FILE_NAME)
 136                 {
 137                   if (flags & FNM_LEADING_DIR)
 138                     result = 0;
 139                   else
 140                     {
 141                       if (MEMCHR (n, L_('/'), string_end - n) == NULL)
 142                         result = 0;
 143                     }
 144                 }
 145
 146               return result;
 147             }
 148           else
 149             {
 150               const CHAR *endp;
 151
 152               endp = MEMCHR (n, (flags & FNM_FILE_NAME) ? L_('/') : L_('\0'),
 153                              string_end - n);
 154               if (endp == NULL)
 155                 endp = string_end;
 156
 157               if (c == L_('[')
 158                   || (__builtin_expect (flags & FNM_EXTMATCH, 0) != 0
 159                       && (c == L_('@') || c == L_('+') || c == L_('!'))
 160                       && *p == L_('(')))
 161                 {
 162                   int flags2 = ((flags & FNM_FILE_NAME)
 163                                 ? flags : (flags & ~FNM_PERIOD));
 164                   bool no_leading_period2 = no_leading_period;
 165
 166                   for (--p; n < endp; ++n, no_leading_period2 = false)
 167                     if (FCT (p, n, string_end, no_leading_period2, flags2)
 168                         == 0)
 169                       return 0;
 170                 }
 171               else if (c == L_('/') && (flags & FNM_FILE_NAME))
 172                 {
 173                   while (n < string_end && *n != L_('/'))
 174                     ++n;
 175                   if (n < string_end && *n == L_('/')
 176                       && (FCT (p, n + 1, string_end, flags & FNM_PERIOD, flags)
 177                           == 0))
 178                     return 0;
 179                 }
 180               else
 181                 {
 182                   int flags2 = ((flags & FNM_FILE_NAME)
 183                                 ? flags : (flags & ~FNM_PERIOD));
 184                   int no_leading_period2 = no_leading_period;
 185
 186                   if (c == L_('\\') && !(flags & FNM_NOESCAPE))
 187                     c = *p;
 188                   c = FOLD (c);
 189                   for (--p; n < endp; ++n, no_leading_period2 = false)
 190                     if (FOLD ((UCHAR) *n) == c
 191                         && (FCT (p, n, string_end, no_leading_period2, flags2)
 192                             == 0))
 193                       return 0;
 194                 }
 195             }
 196
 197           /* If we come here no match is possible with the wildcard.  */
 198           return FNM_NOMATCH;
 199
 200         case L_('['):
 201           {
 202             /* Nonzero if the sense of the character class is inverted.  */
 203             CHAR *p_init = p;
 204             CHAR *n_init = n;
 205             register bool not;
 206             CHAR cold;
 207             UCHAR fn;
 208
 209             if (posixly_correct == 0)
 210               posixly_correct = getenv ("POSIXLY_CORRECT") != NULL ? 1 : -1;
 211
 212             if (n == string_end)
 213               return FNM_NOMATCH;
 214
 215             if (*n == L_('.') && no_leading_period)
 216               return FNM_NOMATCH;
 217
 218             if (*n == L_('/') && (flags & FNM_FILE_NAME))
 219               /* `/' cannot be matched.  */
 220               return FNM_NOMATCH;
 221
 222             not = (*p == L_('!') || (posixly_correct < 0 && *p == L_('^')));
 223             if (not)
 224               ++p;
 225
 226             fn = FOLD ((UCHAR) *n);
 227
 228             c = *p++;
 229             for (;;)
 230               {
 231                 if (!(flags & FNM_NOESCAPE) && c == L_('\\'))
 232                   {
 233                     if (*p == L_('\0'))
 234                       return FNM_NOMATCH;
 235                     c = FOLD ((UCHAR) *p);
 236                     ++p;
 237
 238                     goto normal_bracket;
 239                   }
 240                 else if (c == L_('[') && *p == L_(':'))
 241                   {
 242                     /* Leave room for the null.  */
 243                     CHAR str[CHAR_CLASS_MAX_LENGTH + 1];
 244                     size_t c1 = 0;
 245 #if defined _LIBC || WIDE_CHAR_SUPPORT
 246                     wctype_t wt;
 247 #endif
 248                     const CHAR *startp = p;
 249
 250                     for (;;)
 251                       {
 252                         if (c1 == CHAR_CLASS_MAX_LENGTH)
 253                           /* The name is too long and therefore the pattern
 254                              is ill-formed.  */
 255                           return FNM_NOMATCH;
 256
 257                         c = *++p;
 258                         if (c == L_(':') && p[1] == L_(']'))
 259                           {
 260                             p += 2;
 261                             break;
 262                           }
 263                         if (c < L_('a') || c >= L_('z'))
 264                           {
 265                             /* This cannot possibly be a character class name.
 266                                Match it as a normal range.  */
 267                             p = startp;
 268                             c = L_('[');
 269                             goto normal_bracket;
 270                           }
 271                         str[c1++] = c;
 272                       }
 273                     str[c1] = L_('\0');
 274
 275 #if defined _LIBC || WIDE_CHAR_SUPPORT
 276                     wt = IS_CHAR_CLASS (str);
 277                     if (wt == 0)
 278                       /* Invalid character class name.  */
 279                       return FNM_NOMATCH;
 280
 281 # if defined _LIBC && ! WIDE_CHAR_VERSION
 282                     /* The following code is glibc specific but does
 283                        there a good job in speeding up the code since
 284                        we can avoid the btowc() call.  */
 285                     if (_ISCTYPE ((UCHAR) *n, wt))
 286                       goto matched;
 287 # else
 288                     if (ISWCTYPE (BTOWC ((UCHAR) *n), wt))
 289                       goto matched;
 290 # endif
 291 #else
 292                     if ((STREQ (str, L_("alnum")) && isalnum ((UCHAR) *n))
 293                         || (STREQ (str, L_("alpha")) && isalpha ((UCHAR) *n))
 294                         || (STREQ (str, L_("blank")) && isblank ((UCHAR) *n))
 295                         || (STREQ (str, L_("cntrl")) && iscntrl ((UCHAR) *n))
 296                         || (STREQ (str, L_("digit")) && isdigit ((UCHAR) *n))
 297                         || (STREQ (str, L_("graph")) && isgraph ((UCHAR) *n))
 298                         || (STREQ (str, L_("lower")) && islower ((UCHAR) *n))
 299                         || (STREQ (str, L_("print")) && isprint ((UCHAR) *n))
 300                         || (STREQ (str, L_("punct")) && ispunct ((UCHAR) *n))
 301                         || (STREQ (str, L_("space")) && isspace ((UCHAR) *n))
 302                         || (STREQ (str, L_("upper")) && isupper ((UCHAR) *n))
 303                         || (STREQ (str, L_("xdigit")) && isxdigit ((UCHAR) *n)))
 304                       goto matched;
 305 #endif
 306                     c = *p++;
 307                   }
 308 #ifdef _LIBC
 309                 else if (c == L_('[') && *p == L_('='))
 310                   {
 311                     UCHAR str[1];
 312                     uint32_t nrules =
 313                       _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
 314                     const CHAR *startp = p;
 315
 316                     c = *++p;
 317                     if (c == L_('\0'))
 318                       {
 319                         p = startp;
 320                         c = L_('[');
 321                         goto normal_bracket;
 322                       }
 323                     str[0] = c;
 324
 325                     c = *++p;
 326                     if (c != L_('=') || p[1] != L_(']'))
 327                       {
 328                         p = startp;
 329                         c = L_('[');
 330                         goto normal_bracket;
 331                       }
 332                     p += 2;
 333
 334                     if (nrules == 0)
 335                       {
 336                         if ((UCHAR) *n == str[0])
 337                           goto matched;
 338                       }
 339                     else
 340                       {
 341                         const int32_t *table;
 342 # if WIDE_CHAR_VERSION
 343                         const int32_t *weights;
 344                         const int32_t *extra;
 345 # else
 346                         const unsigned char *weights;
 347                         const unsigned char *extra;
 348 # endif
 349                         const int32_t *indirect;
 350                         int32_t idx;
 351                         const UCHAR *cp = (const UCHAR *) str;
 352
 353                         /* This #include defines a local function!  */
 354 # if WIDE_CHAR_VERSION
 355 #  include <locale/weightwc.h>
 356 # else
 357 #  include <locale/weight.h>
 358 # endif
 359
 360 # if WIDE_CHAR_VERSION
 361                         table = (const int32_t *)
 362                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEWC);
 363                         weights = (const int32_t *)
 364                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTWC);
 365                         extra = (const int32_t *)
 366                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAWC);
 367                         indirect = (const int32_t *)
 368                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTWC);
 369 # else
 370                         table = (const int32_t *)
 371                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
 372                         weights = (const unsigned char *)
 373                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTMB);
 374                         extra = (const unsigned char *)
 375                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB);
 376                         indirect = (const int32_t *)
 377                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB);
 378 # endif
 379
 380                         idx = findidx (&cp);
 381                         if (idx != 0)
 382                           {
 383                             /* We found a table entry.  Now see whether the
 384                                character we are currently at has the same
 385                                equivalance class value.  */
 386                             int len = weights[idx & 0xffffff];
 387                             int32_t idx2;
 388                             const UCHAR *np = (const UCHAR *) n;
 389
 390                             idx2 = findidx (&np);
 391                             if (idx2 != 0
 392                                 && (idx >> 24) == (idx2 >> 24)
 393                                 && len == weights[idx2 & 0xffffff])
 394                               {
 395                                 int cnt = 0;
 396
 397                                 idx &= 0xffffff;
 398                                 idx2 &= 0xffffff;
 399
 400                                 while (cnt < len
 401                                        && (weights[idx + 1 + cnt]
 402                                            == weights[idx2 + 1 + cnt]))
 403                                   ++cnt;
 404
 405                                 if (cnt == len)
 406                                   goto matched;
 407                               }
 408                           }
 409                       }
 410
 411                     c = *p++;
 412                   }
 413 #endif
 414                 else if (c == L_('\0'))
 415                   {
 416                     /* [ unterminated, treat as normal character.  */
 417                     p = p_init;
 418                     n = n_init;
 419                     c = L_('[');
 420                     goto normal_match;
 421                   }
 422                 else
 423                   {
 424                     bool is_range = false;
 425
 426 #ifdef _LIBC
 427                     bool is_seqval = false;
 428
 429                     if (c == L_('[') && *p == L_('.'))
 430                       {
 431                         uint32_t nrules =
 432                           _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
 433                         const CHAR *startp = p;
 434                         size_t c1 = 0;
 435
 436                         while (1)
 437                           {
 438                             c = *++p;
 439                             if (c == L_('.') && p[1] == L_(']'))
 440                               {
 441                                 p += 2;
 442                                 break;
 443                               }
 444                             if (c == '\0')
 445                               return FNM_NOMATCH;
 446                             ++c1;
 447                           }
 448
 449                         /* We have to handling the symbols differently in
 450                            ranges since then the collation sequence is
 451                            important.  */
 452                         is_range = *p == L_('-') && p[1] != L_('\0');
 453
 454                         if (nrules == 0)
 455                           {
 456                             /* There are no names defined in the collation
 457                                data.  Therefore we only accept the trivial
 458                                names consisting of the character itself.  */
 459                             if (c1 != 1)
 460                               return FNM_NOMATCH;
 461
 462                             if (!is_range && *n == startp[1])
 463                               goto matched;
 464
 465                             cold = startp[1];
 466                             c = *p++;
 467                           }
 468                         else
 469                           {
 470                             int32_t table_size;
 471                             const int32_t *symb_table;
 472 # ifdef WIDE_CHAR_VERSION
 473                             char str[c1];
 474                             size_t strcnt;
 475 # else
 476 #  define str (startp + 1)
 477 # endif
 478                             const unsigned char *extra;
 479                             int32_t idx;
 480                             int32_t elem;
 481                             int32_t second;
 482                             int32_t hash;
 483
 484 # ifdef WIDE_CHAR_VERSION
 485                             /* We have to convert the name to a single-byte
 486                                string.  This is possible since the names
 487                                consist of ASCII characters and the internal
 488                                representation is UCS4.  */
 489                             for (strcnt = 0; strcnt < c1; ++strcnt)
 490                               str[strcnt] = startp[1 + strcnt];
 491 # endif
 492
 493                             table_size =
 494                               _NL_CURRENT_WORD (LC_COLLATE,
 495                                                 _NL_COLLATE_SYMB_HASH_SIZEMB);
 496                             symb_table = (const int32_t *)
 497                               _NL_CURRENT (LC_COLLATE,
 498                                            _NL_COLLATE_SYMB_TABLEMB);
 499                             extra = (const unsigned char *)
 500                               _NL_CURRENT (LC_COLLATE,
 501                                            _NL_COLLATE_SYMB_EXTRAMB);
 502
 503                             /* Locate the character in the hashing table.  */
 504                             hash = elem_hash (str, c1);
 505
 506                             idx = 0;
 507                             elem = hash % table_size;
 508                             if (symb_table[2 * elem] != 0)
 509                               {
 510                                 second = hash % (table_size - 2) + 1;
 511
 512                                 do
 513                                   {
 514                                     /* First compare the hashing value.  */
 515                                     if (symb_table[2 * elem] == hash
 516                                         && (c1
 517                                             == extra[symb_table[2 * elem + 1]])
 518                                         && memcmp (str,
 519                                                    &extra[symb_table[2 * elem
 520                                                                      + 1]
 521                                                           + 1], c1) == 0)
 522                                       {
 523                                         /* Yep, this is the entry.  */
 524                                         idx = symb_table[2 * elem + 1];
 525                                         idx += 1 + extra[idx];
 526                                         break;
 527                                       }
 528
 529                                     /* Next entry.  */
 530                                     elem += second;
 531                                   }
 532                                 while (symb_table[2 * elem] != 0);
 533                               }
 534
 535                             if (symb_table[2 * elem] != 0)
 536                               {
 537                                 /* Compare the byte sequence but only if
 538                                    this is not part of a range.  */
 539 # ifdef WIDE_CHAR_VERSION
 540                                 int32_t *wextra;
 541
 542                                 idx += 1 + extra[idx];
 543                                 /* Adjust for the alignment.  */
 544                                 idx = (idx + 3) & ~3;
 545
 546                                 wextra = (int32_t *) &extra[idx + 4];
 547 # endif
 548
 549                                 if (! is_range)
 550                                   {
 551 # ifdef WIDE_CHAR_VERSION
 552                                     for (c1 = 0;
 553                                          (int32_t) c1 < wextra[idx];
 554                                          ++c1)
 555                                       if (n[c1] != wextra[1 + c1])
 556                                         break;
 557
 558                                     if ((int32_t) c1 == wextra[idx])
 559                                       goto matched;
 560 # else
 561                                     for (c1 = 0; c1 < extra[idx]; ++c1)
 562                                       if (n[c1] != extra[1 + c1])
 563                                         break;
 564
 565                                     if (c1 == extra[idx])
 566                                       goto matched;
 567 # endif
 568                                   }
 569
 570                                 /* Get the collation sequence value.  */
 571                                 is_seqval = true;
 572 # ifdef WIDE_CHAR_VERSION
 573                                 cold = wextra[1 + wextra[idx]];
 574 # else
 575                                 /* Adjust for the alignment.  */
 576                                 idx += 1 + extra[idx];
 577                                 idx = (idx + 3) & ~4;
 578                                 cold = *((int32_t *) &extra[idx]);
 579 # endif
 580
 581                                 c = *p++;
 582                               }
 583                             else if (c1 == 1)
 584                               {
 585                                 /* No valid character.  Match it as a
 586                                    single byte.  */
 587                                 if (!is_range && *n == str[0])
 588                                   goto matched;
 589
 590                                 cold = str[0];
 591                                 c = *p++;
 592                               }
 593                             else
 594                               return FNM_NOMATCH;
 595                           }
 596                       }
 597                     else
 598 # undef str
 599 #endif
 600                       {
 601                         c = FOLD (c);
 602                       normal_bracket:
 603
 604                         /* We have to handling the symbols differently in
 605                            ranges since then the collation sequence is
 606                            important.  */
 607                         is_range = (*p == L_('-') && p[1] != L_('\0')
 608                                     && p[1] != L_(']'));
 609
 610                         if (!is_range && c == fn)
 611                           goto matched;
 612
 613 #if _LIBC
 614                         /* This is needed if we goto normal_bracket; from
 615                            outside of is_seqval's scope.  */
 616                         is_seqval = false;
 617 #endif
 618
 619                         cold = c;
 620                         c = *p++;
 621                       }
 622
 623                     if (c == L_('-') && *p != L_(']'))
 624                       {
 625 #if _LIBC
 626                         /* We have to find the collation sequence
 627                            value for C.  Collation sequence is nothing
 628                            we can regularly access.  The sequence
 629                            value is defined by the order in which the
 630                            definitions of the collation values for the
 631                            various characters appear in the source
 632                            file.  A strange concept, nowhere
 633                            documented.  */
 634                         uint32_t fcollseq;
 635                         uint32_t lcollseq;
 636                         UCHAR cend = *p++;
 637
 638 # ifdef WIDE_CHAR_VERSION
 639                         /* Search in the `names' array for the characters.  */
 640                         fcollseq = __collseq_table_lookup (collseq, fn);
 641                         if (fcollseq == ~((uint32_t) 0))
 642                           /* XXX We don't know anything about the character
 643                              we are supposed to match.  This means we are
 644                              failing.  */
 645                           goto range_not_matched;
 646
 647                         if (is_seqval)
 648                           lcollseq = cold;
 649                         else
 650                           lcollseq = __collseq_table_lookup (collseq, cold);
 651 # else
 652                         fcollseq = collseq[fn];
 653                         lcollseq = is_seqval ? cold : collseq[(UCHAR) cold];
 654 # endif
 655
 656                         is_seqval = false;
 657                         if (cend == L_('[') && *p == L_('.'))
 658                           {
 659                             uint32_t nrules =
 660                               _NL_CURRENT_WORD (LC_COLLATE,
 661                                                 _NL_COLLATE_NRULES);
 662                             const CHAR *startp = p;
 663                             size_t c1 = 0;
 664
 665                             while (1)
 666                               {
 667                                 c = *++p;
 668                                 if (c == L_('.') && p[1] == L_(']'))
 669                                   {
 670                                     p += 2;
 671                                     break;
 672                                   }
 673                                 if (c == '\0')
 674                                   return FNM_NOMATCH;
 675                                 ++c1;
 676                               }
 677
 678                             if (nrules == 0)
 679                               {
 680                                 /* There are no names defined in the
 681                                    collation data.  Therefore we only
 682                                    accept the trivial names consisting
 683                                    of the character itself.  */
 684                                 if (c1 != 1)
 685                                   return FNM_NOMATCH;
 686
 687                                 cend = startp[1];
 688                               }
 689                             else
 690                               {
 691                                 int32_t table_size;
 692                                 const int32_t *symb_table;
 693 # ifdef WIDE_CHAR_VERSION
 694                                 char str[c1];
 695                                 size_t strcnt;
 696 # else
 697 #  define str (startp + 1)
 698 # endif
 699                                 const unsigned char *extra;
 700                                 int32_t idx;
 701                                 int32_t elem;
 702                                 int32_t second;
 703                                 int32_t hash;
 704
 705 # ifdef WIDE_CHAR_VERSION
 706                                 /* We have to convert the name to a single-byte
 707                                    string.  This is possible since the names
 708                                    consist of ASCII characters and the internal
 709                                    representation is UCS4.  */
 710                                 for (strcnt = 0; strcnt < c1; ++strcnt)
 711                                   str[strcnt] = startp[1 + strcnt];
 712 # endif
 713
 714                                 table_size =
 715                                   _NL_CURRENT_WORD (LC_COLLATE,
 716                                                     _NL_COLLATE_SYMB_HASH_SIZEMB);
 717                                 symb_table = (const int32_t *)
 718                                   _NL_CURRENT (LC_COLLATE,
 719                                                _NL_COLLATE_SYMB_TABLEMB);
 720                                 extra = (const unsigned char *)
 721                                   _NL_CURRENT (LC_COLLATE,
 722                                                _NL_COLLATE_SYMB_EXTRAMB);
 723
 724                                 /* Locate the character in the hashing
 725                                    table.  */
 726                                 hash = elem_hash (str, c1);
 727
 728                                 idx = 0;
 729                                 elem = hash % table_size;
 730                                 if (symb_table[2 * elem] != 0)
 731                                   {
 732                                     second = hash % (table_size - 2) + 1;
 733
 734                                     do
 735                                       {
 736                                         /* First compare the hashing value.  */
 737                                         if (symb_table[2 * elem] == hash
 738                                             && (c1
 739                                                 == extra[symb_table[2 * elem + 1]])
 740                                             && memcmp (str,
 741                                                        &extra[symb_table[2 * elem + 1]
 742                                                               + 1], c1) == 0)
 743                                           {
 744                                             /* Yep, this is the entry.  */
 745                                             idx = symb_table[2 * elem + 1];
 746                                             idx += 1 + extra[idx];
 747                                             break;
 748                                           }
 749
 750                                         /* Next entry.  */
 751                                         elem += second;
 752                                       }
 753                                     while (symb_table[2 * elem] != 0);
 754                                   }
 755
 756                                 if (symb_table[2 * elem] != 0)
 757                                   {
 758                                     /* Compare the byte sequence but only if
 759                                        this is not part of a range.  */
 760 # ifdef WIDE_CHAR_VERSION
 761                                     int32_t *wextra;
 762
 763                                     idx += 1 + extra[idx];
 764                                     /* Adjust for the alignment.  */
 765                                     idx = (idx + 3) & ~4;
 766
 767                                     wextra = (int32_t *) &extra[idx + 4];
 768 # endif
 769                                     /* Get the collation sequence value.  */
 770                                     is_seqval = true;
 771 # ifdef WIDE_CHAR_VERSION
 772                                     cend = wextra[1 + wextra[idx]];
 773 # else
 774                                     /* Adjust for the alignment.  */
 775                                     idx += 1 + extra[idx];
 776                                     idx = (idx + 3) & ~4;
 777                                     cend = *((int32_t *) &extra[idx]);
 778 # endif
 779                                   }
 780                                 else if (symb_table[2 * elem] != 0 && c1 == 1)
 781                                   {
 782                                     cend = str[0];
 783                                     c = *p++;
 784                                   }
 785                                 else
 786                                   return FNM_NOMATCH;
 787                               }
 788 # undef str
 789                           }
 790                         else
 791                           {
 792                             if (!(flags & FNM_NOESCAPE) && cend == L_('\\'))
 793                               cend = *p++;
 794                             if (cend == L_('\0'))
 795                               return FNM_NOMATCH;
 796                             cend = FOLD (cend);
 797                           }
 798
 799                         /* XXX It is not entirely clear to me how to handle
 800                            characters which are not mentioned in the
 801                            collation specification.  */
 802                         if (
 803 # ifdef WIDE_CHAR_VERSION
 804                             lcollseq == 0xffffffff ||
 805 # endif
 806                             lcollseq <= fcollseq)
 807                           {
 808                             /* We have to look at the upper bound.  */
 809                             uint32_t hcollseq;
 810
 811                             if (is_seqval)
 812                               hcollseq = cend;
 813                             else
 814                               {
 815 # ifdef WIDE_CHAR_VERSION
 816                                 hcollseq =
 817                                   __collseq_table_lookup (collseq, cend);
 818                                 if (hcollseq == ~((uint32_t) 0))
 819                                   {
 820                                     /* Hum, no information about the upper
 821                                        bound.  The matching succeeds if the
 822                                        lower bound is matched exactly.  */
 823                                     if (lcollseq != fcollseq)
 824                                       goto range_not_matched;
 825
 826                                     goto matched;
 827                                   }
 828 # else
 829                                 hcollseq = collseq[cend];
 830 # endif
 831                               }
 832
 833                             if (lcollseq <= hcollseq && fcollseq <= hcollseq)
 834                               goto matched;
 835                           }
 836 # ifdef WIDE_CHAR_VERSION
 837                       range_not_matched:
 838 # endif
 839 #else
 840                         /* We use a boring value comparison of the character
 841                            values.  This is better than comparing using
 842                            `strcoll' since the latter would have surprising
 843                            and sometimes fatal consequences.  */
 844                         UCHAR cend = *p++;
 845
 846                         if (!(flags & FNM_NOESCAPE) && cend == L_('\\'))
 847                           cend = *p++;
 848                         if (cend == L_('\0'))
 849                           return FNM_NOMATCH;
 850
 851                         /* It is a range.  */
 852                         if (cold <= fn && fn <= cend)
 853                           goto matched;
 854 #endif
 855
 856                         c = *p++;
 857                       }
 858                   }
 859
 860                 if (c == L_(']'))
 861                   break;
 862               }
 863
 864             if (!not)
 865               return FNM_NOMATCH;
 866             break;
 867
 868           matched:
 869             /* Skip the rest of the [...] that already matched.  */
 870             do
 871               {
 872               ignore_next:
 873                 c = *p++;
 874
 875                 if (c == L_('\0'))
 876                   /* [... (unterminated) loses.  */
 877                   return FNM_NOMATCH;
 878
 879                 if (!(flags & FNM_NOESCAPE) && c == L_('\\'))
 880                   {
 881                     if (*p == L_('\0'))
 882                       return FNM_NOMATCH;
 883                     /* XXX 1003.2d11 is unclear if this is right.  */
 884                     ++p;
 885                   }
 886                 else if (c == L_('[') && *p == L_(':'))
 887                   {
 888                     int c1 = 0;
 889                     const CHAR *startp = p;
 890
 891                     while (1)
 892                       {
 893                         c = *++p;
 894                         if (++c1 == CHAR_CLASS_MAX_LENGTH)
 895                           return FNM_NOMATCH;
 896
 897                         if (*p == L_(':') && p[1] == L_(']'))
 898                           break;
 899
 900                         if (c < L_('a') || c >= L_('z'))
 901                           {
 902                             p = startp;
 903                             goto ignore_next;
 904                           }
 905                       }
 906                     p += 2;
 907                     c = *p++;
 908                   }
 909                 else if (c == L_('[') && *p == L_('='))
 910                   {
 911                     c = *++p;
 912                     if (c == L_('\0'))
 913                       return FNM_NOMATCH;
 914                     c = *++p;
 915                     if (c != L_('=') || p[1] != L_(']'))
 916                       return FNM_NOMATCH;
 917                     p += 2;
 918                     c = *p++;
 919                   }
 920                 else if (c == L_('[') && *p == L_('.'))
 921                   {
 922                     ++p;
 923                     while (1)
 924                       {
 925                         c = *++p;
 926                         if (c == '\0')
 927                           return FNM_NOMATCH;
 928
 929                         if (*p == L_('.') && p[1] == L_(']'))
 930                           break;
 931                       }
 932                     p += 2;
 933                     c = *p++;
 934                   }
 935               }
 936             while (c != L_(']'));
 937             if (not)
 938               return FNM_NOMATCH;
 939           }
 940           break;
 941
 942         case L_('+'):
 943         case L_('@'):
 944         case L_('!'):
 945           if (__builtin_expect (flags & FNM_EXTMATCH, 0) && *p == '(')
 946             {
 947               int res;
 948
 949               res = EXT (c, p, n, string_end, no_leading_period, flags);
 950               if (res != -1)
 951                 return res;
 952             }
 953           goto normal_match;
 954
 955         case L_('/'):
 956           if (NO_LEADING_PERIOD (flags))
 957             {
 958               if (n == string_end || c != (UCHAR) *n)
 959                 return FNM_NOMATCH;
 960
 961               new_no_leading_period = true;
 962               break;
 963             }
 964           /* FALLTHROUGH */
 965         default:
 966         normal_match:
 967           if (n == string_end || c != FOLD ((UCHAR) *n))
 968             return FNM_NOMATCH;
 969         }
 970
 971       no_leading_period = new_no_leading_period;
 972       ++n;
 973     }
 974
 975   if (n == string_end)
 976     return 0;
 977
 978   if ((flags & FNM_LEADING_DIR) && n != string_end && *n == L_('/'))
 979     /* The FNM_LEADING_DIR flag says that "foo*" matches "foobar/frobozz".  */
 980     return 0;
 981
 982   return FNM_NOMATCH;
 983 }
 984
 985
 986 static const CHAR *
 987 internal_function
 988 END (const CHAR *pattern)
 989 {
 990   const CHAR *p = pattern;
 991
 992   while (1)
 993     if (*++p == L_('\0'))
 994       /* This is an invalid pattern.  */
 995       return pattern;
 996     else if (*p == L_('['))
 997       {
 998         /* Handle brackets special.  */
 999         if (posixly_correct == 0)
1000           posixly_correct = getenv ("POSIXLY_CORRECT") != NULL ? 1 : -1;
1001
1002         /* Skip the not sign.  We have to recognize it because of a possibly
1003            following ']'.  */
1004         if (*++p == L_('!') || (posixly_correct < 0 && *p == L_('^')))
1005           ++p;
1006         /* A leading ']' is recognized as such.  */
1007         if (*p == L_(']'))
1008           ++p;
1009         /* Skip over all characters of the list.  */
1010         while (*p != L_(']'))
1011           if (*p++ == L_('\0'))
1012             /* This is no valid pattern.  */
1013             return pattern;
1014       }
1015     else if ((*p == L_('?') || *p == L_('*') || *p == L_('+') || *p == L_('@')
1016               || *p == L_('!')) && p[1] == L_('('))
1017       p = END (p + 1);
1018     else if (*p == L_(')'))
1019       break;
1020
1021   return p + 1;
1022 }
1023
1024
1025 static int
1026 internal_function
1027 EXT (INT opt, const CHAR *pattern, const CHAR *string, const CHAR *string_end,
1028      bool no_leading_period, int flags)
1029 {
1030   const CHAR *startp;
1031   size_t level;
1032   struct patternlist
1033   {
1034     struct patternlist *next;
1035     CHAR str[1];
1036   } *list = NULL;
1037   struct patternlist **lastp = &list;
1038   size_t pattern_len = STRLEN (pattern);
1039   const CHAR *p;
1040   const CHAR *rs;
1041   enum { ALLOCA_LIMIT = 8000 };
1042
1043   /* Parse the pattern.  Store the individual parts in the list.  */
1044   level = 0;
1045   for (startp = p = pattern + 1; ; ++p)
1046     if (*p == L_('\0'))
1047       /* This is an invalid pattern.  */
1048       return -1;
1049     else if (*p == L_('['))
1050       {
1051         /* Handle brackets special.  */
1052         if (posixly_correct == 0)
1053           posixly_correct = getenv ("POSIXLY_CORRECT") != NULL ? 1 : -1;
1054
1055         /* Skip the not sign.  We have to recognize it because of a possibly
1056            following ']'.  */
1057         if (*++p == L_('!') || (posixly_correct < 0 && *p == L_('^')))
1058           ++p;
1059         /* A leading ']' is recognized as such.  */
1060         if (*p == L_(']'))
1061           ++p;
1062         /* Skip over all characters of the list.  */
1063         while (*p != L_(']'))
1064           if (*p++ == L_('\0'))
1065             /* This is no valid pattern.  */
1066             return -1;
1067       }
1068     else if ((*p == L_('?') || *p == L_('*') || *p == L_('+') || *p == L_('@')
1069               || *p == L_('!')) && p[1] == L_('('))
1070       /* Remember the nesting level.  */
1071       ++level;
1072     else if (*p == L_(')'))
1073       {
1074         if (level-- == 0)
1075           {
1076             /* This means we found the end of the pattern.  */
1077 #define NEW_PATTERN \
1078             struct patternlist *newp;                                         \
1079             size_t plen;                                                      \
1080             size_t plensize;                                                  \
1081             size_t newpsize;                                                  \
1082                                                                               \
1083             plen = (opt == L_('?') || opt == L_('@')                          \
1084                     ? pattern_len                                             \
1085                     : p - startp + 1UL);                                      \
1086             plensize = plen * sizeof (CHAR);                                  \
1087             newpsize = offsetof (struct patternlist, str) + plensize;         \
1088             if ((size_t) -1 / sizeof (CHAR) < plen                            \
1089                 || newpsize < offsetof (struct patternlist, str)              \
1090                 || ALLOCA_LIMIT <= newpsize)                                  \
1091               return -1;                                                      \
1092             newp = (struct patternlist *) alloca (newpsize);                  \
1093             *((CHAR *) MEMPCPY (newp->str, startp, p - startp)) = L_('\0');    \
1094             newp->next = NULL;                                                \
1095             *lastp = newp;                                                    \
1096             lastp = &newp->next
1097             NEW_PATTERN;
1098             break;
1099           }
1100       }
1101     else if (*p == L_('|'))
1102       {
1103         if (level == 0)
1104           {
1105             NEW_PATTERN;
1106             startp = p + 1;
1107           }
1108       }
1109   assert (list != NULL);
1110   assert (p[-1] == L_(')'));
1111 #undef NEW_PATTERN
1112
1113   switch (opt)
1114     {
1115     case L_('*'):
1116       if (FCT (p, string, string_end, no_leading_period, flags) == 0)
1117         return 0;
1118       /* FALLTHROUGH */
1119
1120     case L_('+'):
1121       do
1122         {
1123           for (rs = string; rs <= string_end; ++rs)
1124             /* First match the prefix with the current pattern with the
1125                current pattern.  */
1126             if (FCT (list->str, string, rs, no_leading_period,
1127                      flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD) == 0
1128                 /* This was successful.  Now match the rest with the rest
1129                    of the pattern.  */
1130                 && (FCT (p, rs, string_end,
1131                          rs == string
1132                          ? no_leading_period
1133                          : rs[-1] == '/' && NO_LEADING_PERIOD (flags),
1134                          flags & FNM_FILE_NAME
1135                          ? flags : flags & ~FNM_PERIOD) == 0
1136                     /* This didn't work.  Try the whole pattern.  */
1137                     || (rs != string
1138                         && FCT (pattern - 1, rs, string_end,
1139                                 rs == string
1140                                 ? no_leading_period
1141                                 : rs[-1] == '/' && NO_LEADING_PERIOD (flags),
1142                                 flags & FNM_FILE_NAME
1143                                 ? flags : flags & ~FNM_PERIOD) == 0)))
1144               /* It worked.  Signal success.  */
1145               return 0;
1146         }
1147       while ((list = list->next) != NULL);
1148
1149       /* None of the patterns lead to a match.  */
1150       return FNM_NOMATCH;
1151
1152     case L_('?'):
1153       if (FCT (p, string, string_end, no_leading_period, flags) == 0)
1154         return 0;
1155       /* FALLTHROUGH */
1156
1157     case L_('@'):
1158       do
1159         /* I cannot believe it but `strcat' is actually acceptable
1160            here.  Match the entire string with the prefix from the
1161            pattern list and the rest of the pattern following the
1162            pattern list.  */
1163         if (FCT (STRCAT (list->str, p), string, string_end,
1164                  no_leading_period,
1165                  flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD) == 0)
1166           /* It worked.  Signal success.  */
1167           return 0;
1168       while ((list = list->next) != NULL);
1169
1170       /* None of the patterns lead to a match.  */
1171       return FNM_NOMATCH;
1172
1173     case L_('!'):
1174       for (rs = string; rs <= string_end; ++rs)
1175         {
1176           struct patternlist *runp;
1177
1178           for (runp = list; runp != NULL; runp = runp->next)
1179             if (FCT (runp->str, string, rs,  no_leading_period,
1180                      flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD) == 0)
1181               break;
1182
1183           /* If none of the patterns matched see whether the rest does.  */
1184           if (runp == NULL
1185               && (FCT (p, rs, string_end,
1186                        rs == string
1187                        ? no_leading_period
1188                        : rs[-1] == '/' && NO_LEADING_PERIOD (flags),
1189                        flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD)
1190                   == 0))
1191             /* This is successful.  */
1192             return 0;
1193         }
1194
1195       /* None of the patterns together with the rest of the pattern
1196          lead to a match.  */
1197       return FNM_NOMATCH;
1198
1199     default:
1200       assert (! "Invalid extended matching operator");
1201       break;
1202     }
1203
1204   return -1;
1205 }
1206
1207
1208 #undef FOLD
1209 #undef CHAR
1210 #undef UCHAR
1211 #undef INT
1212 #undef FCT
1213 #undef EXT
1214 #undef END
1215 #undef MEMPCPY
1216 #undef MEMCHR
1217 #undef STRCOLL
1218 #undef STRLEN
1219 #undef STRCAT
1220 #undef L_
1221 #undef BTOWC