2005-08-31 Paul Eggert <eggert@cs.ucla.edu>
+ * regcomp.c (re_compile_fastmap_iter, init_dfa, init_word_char):
+ (optimize_subexps, lower_subexp):
+ Don't assume 1<<31 has defined behavior on hosts with 32-bit int,
+ since the signed shift might overflow. Use 1u<<31 instead.
+ * regex_internal.h (bitset_set, bitset_clear, bitset_contain): Likewise.
+ * regexec.c (check_dst_limits_calc_pos_1, check_subexp_matching_top):
+ Likewise.
+ * regcomp.c (optimize_subexps, lower_subexp):
+ Use CHAR_BIT rather than 8, for clarity.
+ * regexec.c (check_dst_limits_calc_pos_1):
+ (check_subexp_matching_top): Likewise.
+ * regcomp.c (init_dfa): Make table_size unsigned, so that we don't
+ have to worry about portability issues when shifting it left.
+ Remove no-longer-needed test for table_size > 0.
+ * regcomp.c (parse_sub_exp): Do not shift more bits than there are
+ in a word, as the resulting behavior is undefined.
+ * regexec.c (check_dst_limits_calc_pos_1): Likewise;
+ in one case, a <= should have been an <, and in another case the
+ whole test was missing.
+ * regex_internal.h (BYTE_BITS): Remove. All uses changed to
+ the standard name CHAR_BIT.
+ * regexec.c (match_ctx_add_entry): Don't assume that ~0 == -1;
+ this is not true on one's complement and signed-magnitude hosts.
+
* regex_internal.h (re_sub_match_top_t): Remove unused member
next_last_offset.
(struct re_dfa_t): Remove unused member states_alloc.
int i, j, ch;
for (i = 0, ch = 0; i < BITSET_UINTS; ++i)
for (j = 0; j < UINT_BITS; ++j, ++ch)
- if (dfa->nodes[node].opr.sbcset[i] & (1 << j))
+ if (dfa->nodes[node].opr.sbcset[i] & (1u << j))
re_set_fastmap (fastmap, icase, ch);
}
#ifdef RE_ENABLE_I18N
static reg_errcode_t
init_dfa (re_dfa_t *dfa, int pat_len)
{
- int table_size;
+ unsigned int table_size;
#ifndef _LIBC
char *codeset_name;
#endif
dfa->nodes = re_malloc (re_token_t, dfa->nodes_alloc);
/* table_size = 2 ^ ceil(log pat_len) */
- for (table_size = 1; table_size > 0; table_size <<= 1)
+ for (table_size = 1; ; table_size <<= 1)
if (table_size > pat_len)
break;
{
wint_t wch = __btowc (ch);
if (wch != WEOF)
- dfa->sb_char[i] |= 1 << j;
+ dfa->sb_char[i] |= 1u << j;
# ifndef _LIBC
if (isascii (ch) && wch != ch)
dfa->map_notascii = 1;
for (i = 0, ch = 0; i < BITSET_UINTS; ++i)
for (j = 0; j < UINT_BITS; ++j, ++ch)
if (isalnum (ch) || ch == '_')
- dfa->word_char[i] |= 1 << j;
+ dfa->word_char[i] |= 1u << j;
}
/* Free the work area which are only used while compiling. */
node->left->parent = node;
dfa->subexp_map[other_idx] = dfa->subexp_map[node->token.opr.idx];
- if (other_idx < 8 * sizeof (dfa->used_bkref_map))
- dfa->used_bkref_map &= ~(1 << other_idx);
+ if (other_idx < CHAR_BIT * sizeof dfa->used_bkref_map)
+ dfa->used_bkref_map &= ~(1u << other_idx);
}
return REG_NOERROR;
very common, so we do not lose much. An example that triggers
this case is the sed "script" /\(\)/x. */
&& node->left != NULL
- && (node->token.opr.idx >= 8 * sizeof (dfa->used_bkref_map)
- || !(dfa->used_bkref_map & (1 << node->token.opr.idx))))
+ && (node->token.opr.idx >= CHAR_BIT * sizeof dfa->used_bkref_map
+ || !(dfa->used_bkref_map & (1u << node->token.opr.idx))))
return node->left;
/* Convert the SUBEXP node to the concatenation of an
if (BE (*err != REG_NOERROR, 0))
return NULL;
}
- dfa->completed_bkref_map |= 1 << cur_nsub;
+
+ if (cur_nsub <= '9' - '1')
+ dfa->completed_bkref_map |= 1 << cur_nsub;
tree = create_tree (dfa, tree, NULL, SUBEXP);
if (BE (tree == NULL, 0))
# define inline
#endif
-/* Number of bits in a byte. */
-#define BYTE_BITS 8
/* Number of single byte character. */
#define SBC_MAX 256
extern const size_t __re_error_msgid_idx[] attribute_hidden;
/* Number of bits in an unsinged int. */
-#define UINT_BITS (sizeof (unsigned int) * BYTE_BITS)
+#define UINT_BITS (sizeof (unsigned int) * CHAR_BIT)
/* Number of unsigned int in an bit_set. */
#define BITSET_UINTS ((SBC_MAX + UINT_BITS - 1) / UINT_BITS)
typedef unsigned int bitset[BITSET_UINTS];
typedef unsigned int *re_bitset_ptr_t;
typedef const unsigned int *re_const_bitset_ptr_t;
-#define bitset_set(set,i) (set[i / UINT_BITS] |= 1 << i % UINT_BITS)
-#define bitset_clear(set,i) (set[i / UINT_BITS] &= ~(1 << i % UINT_BITS))
-#define bitset_contain(set,i) (set[i / UINT_BITS] & (1 << i % UINT_BITS))
+#define bitset_set(set,i) (set[i / UINT_BITS] |= 1u << i % UINT_BITS)
+#define bitset_clear(set,i) (set[i / UINT_BITS] &= ~(1u << i % UINT_BITS))
+#define bitset_contain(set,i) (set[i / UINT_BITS] & (1u << i % UINT_BITS))
#define bitset_empty(set) memset (set, 0, sizeof (unsigned int) * BITSET_UINTS)
#define bitset_set_all(set) \
memset (set, 255, sizeof (unsigned int) * BITSET_UINTS)
if (ent->node != node)
continue;
- if (subexp_idx <= 8 * sizeof (ent->eps_reachable_subexps_map)
- && !(ent->eps_reachable_subexps_map & (1 << subexp_idx)))
+ if (subexp_idx
+ < CHAR_BIT * sizeof ent->eps_reachable_subexps_map
+ && !(ent->eps_reachable_subexps_map & (1u << subexp_idx)))
continue;
/* Recurse trying to reach the OP_OPEN_SUBEXP and
if (cpos == 0 && (boundaries & 2))
return 0;
- ent->eps_reachable_subexps_map &= ~(1 << subexp_idx);
+ if (subexp_idx
+ < CHAR_BIT * sizeof ent->eps_reachable_subexps_map)
+ ent->eps_reachable_subexps_map &= ~(1u << subexp_idx);
}
while (ent++->more);
}
{
int node = cur_nodes->elems[node_idx];
if (dfa->nodes[node].type == OP_OPEN_SUBEXP
- && dfa->nodes[node].opr.idx < (8 * sizeof (dfa->used_bkref_map))
- && dfa->used_bkref_map & (1 << dfa->nodes[node].opr.idx))
+ && dfa->nodes[node].opr.idx < CHAR_BIT * sizeof dfa->used_bkref_map
+ && dfa->used_bkref_map & (1u << dfa->nodes[node].opr.idx))
{
err = match_ctx_add_subtop (mctx, node, str_idx);
if (BE (err != REG_NOERROR, 0))
A backreference does not epsilon-transition unless it is empty, so set
to all zeros if FROM != TO. */
mctx->bkref_ents[mctx->nbkref_ents].eps_reachable_subexps_map
- = (from == to ? ~0 : 0);
+ = (from == to ? -1 : 0);
mctx->bkref_ents[mctx->nbkref_ents++].more = 0;
if (mctx->max_mb_elem_len < to - from)