utimens: cache whether utimensat syscall works

[pspp] / lib / regex_internal.c
diff --git a/lib/regex_internal.c b/lib/regex_internal.c

index 633db7a7fb4e4a213184b034cce5a361a92fa6f8..904b88ed934a2a19ab53db2fe2fcbe74b58a3d97 100644 (file)
--- a/lib/regex_internal.c
+++ b/lib/regex_internal.c
@@ -1,5 +1,6 @@
  /* Extended regular expression matching and search library.
-   Copyright (C) 2002, 2003, 2004, 2005, 2006 Free Software Foundation, Inc.
+   Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009
+   Free Software Foundation, Inc.
     This file is part of the GNU C Library.
     Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
  
@@ -235,7 +236,7 @@ build_wcs_buffer (re_string_t *pstr)
         }
        else
         p = (const char *) pstr->raw_mbs + pstr->raw_mbs_idx + byte_idx;
-      mbclen = mbrtowc (&wc, p, remain_len, &pstr->cur_state);
+      mbclen = __mbrtowc (&wc, p, remain_len, &pstr->cur_state);
        if (BE (mbclen == (size_t) -2, 0))
         {
           /* The buffer doesn't have enough space, finish to build.  */
@@ -305,9 +306,9 @@ build_wcs_upper_buffer (re_string_t *pstr)
  
           remain_len = end_idx - byte_idx;
           prev_st = pstr->cur_state;
-         mbclen = mbrtowc (&wc,
-                           ((const char *) pstr->raw_mbs + pstr->raw_mbs_idx
-                            + byte_idx), remain_len, &pstr->cur_state);
+         mbclen = __mbrtowc (&wc,
+                             ((const char *) pstr->raw_mbs + pstr->raw_mbs_idx
+                              + byte_idx), remain_len, &pstr->cur_state);
           if (BE (mbclen < (size_t) -2, 1))
             {
               wchar_t wcu = wc;
@@ -375,7 +376,7 @@ build_wcs_upper_buffer (re_string_t *pstr)
           }
         else
           p = (const char *) pstr->raw_mbs + pstr->raw_mbs_idx + src_idx;
-       mbclen = mbrtowc (&wc, p, remain_len, &pstr->cur_state);
+       mbclen = __mbrtowc (&wc, p, remain_len, &pstr->cur_state);
         if (BE (mbclen < (size_t) -2, 1))
           {
             wchar_t wcu = wc;
@@ -498,8 +499,8 @@ re_string_skip_chars (re_string_t *pstr, Idx new_raw_idx, wint_t *last_wc)
        Idx remain_len;
        remain_len = pstr->len - rawbuf_idx;
        prev_st = pstr->cur_state;
-      mbclen = mbrtowc (&wc2, (const char *) pstr->raw_mbs + rawbuf_idx,
-                       remain_len, &pstr->cur_state);
+      mbclen = __mbrtowc (&wc2, (const char *) pstr->raw_mbs + rawbuf_idx,
+                         remain_len, &pstr->cur_state);
        if (BE (mbclen == (size_t) -2 || mbclen == (size_t) -1 || mbclen == 0, 0))
         {
           /* We treat these cases as a single byte character.  */
@@ -597,35 +598,99 @@ re_string_reconstruct (re_string_t *pstr, Idx idx, int eflags)
  
    if (BE (offset != 0, 1))
      {
-      /* Are the characters which are already checked remain?  */
-      if (BE (offset < pstr->valid_raw_len, 1)
-#ifdef RE_ENABLE_I18N
-         /* Handling this would enlarge the code too much.
-            Accept a slowdown in that case.  */
-         && pstr->offsets_needed == 0
-#endif
-        )
+      /* Should the already checked characters be kept?  */
+      if (BE (offset < pstr->valid_raw_len, 1))
         {
           /* Yes, move them to the front of the buffer.  */
-         pstr->tip_context = re_string_context_at (pstr, offset - 1, eflags);
  #ifdef RE_ENABLE_I18N
-         if (pstr->mb_cur_max > 1)
-           memmove (pstr->wcs, pstr->wcs + offset,
-                    (pstr->valid_len - offset) * sizeof (wint_t));
+         if (BE (pstr->offsets_needed, 0))
+           {
+             Idx low = 0, high = pstr->valid_len, mid;
+             do
+               {
+                 mid = (high + low) / 2;
+                 if (pstr->offsets[mid] > offset)
+                   high = mid;
+                 else if (pstr->offsets[mid] < offset)
+                   low = mid + 1;
+                 else
+                   break;
+               }
+             while (low < high);
+             if (pstr->offsets[mid] < offset)
+               ++mid;
+             pstr->tip_context = re_string_context_at (pstr, mid - 1,
+                                                       eflags);
+             /* This can be quite complicated, so handle specially
+                only the common and easy case where the character with
+                different length representation of lower and upper
+                case is present at or after offset.  */
+             if (pstr->valid_len > offset
+                 && mid == offset && pstr->offsets[mid] == offset)
+               {
+                 memmove (pstr->wcs, pstr->wcs + offset,
+                          (pstr->valid_len - offset) * sizeof (wint_t));
+                 memmove (pstr->mbs, pstr->mbs + offset, pstr->valid_len - offset);
+                 pstr->valid_len -= offset;
+                 pstr->valid_raw_len -= offset;
+                 for (low = 0; low < pstr->valid_len; low++)
+                   pstr->offsets[low] = pstr->offsets[low + offset] - offset;
+               }
+             else
+               {
+                 /* Otherwise, just find out how long the partial multibyte
+                    character at offset is and fill it with WEOF/255.  */
+                 pstr->len = pstr->raw_len - idx + offset;
+                 pstr->stop = pstr->raw_stop - idx + offset;
+                 pstr->offsets_needed = 0;
+                 while (mid > 0 && pstr->offsets[mid - 1] == offset)
+                   --mid;
+                 while (mid < pstr->valid_len)
+                   if (pstr->wcs[mid] != WEOF)
+                     break;
+                   else
+                     ++mid;
+                 if (mid == pstr->valid_len)
+                   pstr->valid_len = 0;
+                 else
+                   {
+                     pstr->valid_len = pstr->offsets[mid] - offset;
+                     if (pstr->valid_len)
+                       {
+                         for (low = 0; low < pstr->valid_len; ++low)
+                           pstr->wcs[low] = WEOF;
+                         memset (pstr->mbs, 255, pstr->valid_len);
+                       }
+                   }
+                 pstr->valid_raw_len = pstr->valid_len;
+               }
+           }
+         else
+#endif
+           {
+             pstr->tip_context = re_string_context_at (pstr, offset - 1,
+                                                       eflags);
+#ifdef RE_ENABLE_I18N
+             if (pstr->mb_cur_max > 1)
+               memmove (pstr->wcs, pstr->wcs + offset,
+                        (pstr->valid_len - offset) * sizeof (wint_t));
  #endif /* RE_ENABLE_I18N */
-         if (BE (pstr->mbs_allocated, 0))
-           memmove (pstr->mbs, pstr->mbs + offset,
-                    pstr->valid_len - offset);
-         pstr->valid_len -= offset;
-         pstr->valid_raw_len -= offset;
+             if (BE (pstr->mbs_allocated, 0))
+               memmove (pstr->mbs, pstr->mbs + offset,
+                        pstr->valid_len - offset);
+             pstr->valid_len -= offset;
+             pstr->valid_raw_len -= offset;
  #if DEBUG
-         assert (pstr->valid_len > 0);
+             assert (pstr->valid_len > 0);
  #endif
+           }
         }
        else
         {
-         /* No, skip all characters until IDX.  */
  #ifdef RE_ENABLE_I18N
+         /* No, skip all characters until IDX.  */
+         Idx prev_valid_len = pstr->valid_len;
+
           if (BE (pstr->offsets_needed, 0))
             {
               pstr->len = pstr->raw_len - idx + offset;
@@ -642,12 +707,14 @@ re_string_reconstruct (re_string_t *pstr, Idx idx, int eflags)
  
               if (pstr->is_utf8)
                 {
-                 const unsigned char *raw, *p, *q, *end;
+                 const unsigned char *raw, *p, *end;
  
                   /* Special case UTF-8.  Multi-byte chars start with any
                      byte other than 0x80 - 0xbf.  */
                   raw = pstr->raw_mbs + pstr->raw_mbs_idx;
                   end = raw + (offset - pstr->mb_cur_max);
+                 if (end < pstr->raw_mbs)
+                   end = pstr->raw_mbs;
                   p = raw + offset - 1;
  #ifdef _LIBC
                   /* We know the wchar_t encoding is UCS4, so for the simple
@@ -655,7 +722,7 @@ re_string_reconstruct (re_string_t *pstr, Idx idx, int eflags)
                   if (isascii (*p) && BE (pstr->trans == NULL, 1))
                     {
                       memset (&pstr->cur_state, '\0', sizeof (mbstate_t));
-                     pstr->valid_len = 0;
+                     /* pstr->valid_len = 0; */
                       wc = (wchar_t) *p;
                     }
                   else
@@ -669,19 +736,17 @@ re_string_reconstruct (re_string_t *pstr, Idx idx, int eflags)
                           unsigned char buf[6];
                           size_t mbclen;
  
-                         q = p;
                           if (BE (pstr->trans != NULL, 0))
                             {
                               int i = mlen < 6 ? mlen : 6;
                               while (--i >= 0)
                                 buf[i] = pstr->trans[p[i]];
-                             q = buf;
                             }
                           /* XXX Don't use mbrtowc, we know which conversion
                              to use (UTF-8 -> UCS4).  */
                           memset (&cur_state, 0, sizeof (cur_state));
-                         mbclen = mbrtowc (&wc2, (const char *) p, mlen,
-                                           &cur_state);
+                         mbclen = __mbrtowc (&wc2, (const char *) p, mlen,
+                                             &cur_state);
                           if (raw + offset - p <= mbclen
                               && mbclen < (size_t) -2)
                             {
@@ -698,7 +763,7 @@ re_string_reconstruct (re_string_t *pstr, Idx idx, int eflags)
                 pstr->valid_len = re_string_skip_chars (pstr, idx, &wc) - idx;
               if (wc == WEOF)
                 pstr->tip_context
-                 = re_string_context_at (pstr, pstr->valid_raw_len - 1, eflags);
+                 = re_string_context_at (pstr, prev_valid_len - 1, eflags);
               else
                 pstr->tip_context = ((BE (pstr->word_ops_used != 0, 0)
                                       && IS_WIDE_WORD_CHAR (wc))
@@ -711,7 +776,7 @@ re_string_reconstruct (re_string_t *pstr, Idx idx, int eflags)
                   for (wcs_idx = 0; wcs_idx < pstr->valid_len; ++wcs_idx)
                     pstr->wcs[wcs_idx] = WEOF;
                   if (pstr->mbs_allocated)
-                   memset (pstr->mbs, -1, pstr->valid_len);
+                   memset (pstr->mbs, 255, pstr->valid_len);
                 }
               pstr->valid_raw_len = pstr->valid_len;
             }
@@ -1340,7 +1405,6 @@ static Idx
  internal_function
  re_dfa_add_node (re_dfa_t *dfa, re_token_t token)
  {
-  int type = token.type;
    if (BE (dfa->nodes_len >= dfa->nodes_alloc, 0))
      {
        size_t new_nodes_alloc = dfa->nodes_alloc * 2;
@@ -1376,8 +1440,11 @@ re_dfa_add_node (re_dfa_t *dfa, re_token_t token)
    dfa->nodes[dfa->nodes_len] = token;
    dfa->nodes[dfa->nodes_len].constraint = 0;
  #ifdef RE_ENABLE_I18N
+  {
+  int type = token.type;
    dfa->nodes[dfa->nodes_len].accept_mb =
      (type == OP_PERIOD && dfa->mb_cur_max > 1) || type == COMPLEX_BRACKET;
+  }
  #endif
    dfa->nexts[dfa->nodes_len] = REG_MISSING;
    re_node_set_init_empty (dfa->edests + dfa->nodes_len);
@@ -1622,11 +1689,9 @@ create_cd_newstate (const re_dfa_t *dfa, const re_node_set *nodes,
  
    for (i = 0 ; i < nodes->nelem ; i++)
      {
-      unsigned int constraint = 0;
        re_token_t *node = dfa->nodes + nodes->elems[i];
        re_token_type_t type = node->type;
-      if (node->constraint)
-       constraint = node->constraint;
+      unsigned int constraint = node->constraint;
  
        if (type == CHARACTER && !constraint)
         continue;
@@ -1639,8 +1704,6 @@ create_cd_newstate (const re_dfa_t *dfa, const re_node_set *nodes,
         newstate->halt = 1;
        else if (type == OP_BACK_REF)
         newstate->has_backref = 1;
-      else if (type == ANCHOR)
-       constraint = node->opr.ctx_type;
  
        if (constraint)
         {