as this has better performance on typical 64-bit hosts.
Port bitset code to hosts with unusual word sizes.
* lib/regcomp.c (build_equiv_class, build_charclass):
(build_range_exp, build_collating_symbol):
Prefer bitset to re_bitset_ptr_t in prototypes, when the actual
argument is a bitset. This is merely a style issue, but it makes
it clearer that an entire array is expected.
(re_compile_fastmap_iter, init_dfa, init_word_char, optimize_subexps):
* lib/regcomp.c (lower_subexp, parse_bracket_exp):
(built_charclass_op):
Port to the case where bitset_word is not the same as unsigned int.
* lib/regex_internal.h (bitset_set, bitset_clear, bitset_contain):
(bitset_not, bitset_merge, bitset_set_all, bitset_mask):
Likewise.
* lib/regexec.c (check_dst_limits_calc_pos_1):
(check_subexp_matching_top):
(build_trtable, group_nodes_into_DFAstates):
Likewise.
* lib/regcomp.c (re_compile_fastmap_iter, utf8_sb_map):
(optimize_utf8):
Don't assume that SBC_MAX is a multiple of BITSET_WORD_BITS.
* lib/regex_internal.h (bitset_set_all, bitset_not): Likewise.
* lib/regexec.c (group_nodes_into_DFAstates): Likewise.
* lib/regcomp.c (utf8_sb_map): Don't assume UINT_MAX == 0xffffffff.
* lib/regcomp.c (optimize_subexps, lower_subexp):
Work even if bitset_word has holes in its bitwise representation.
* lib/regex_internal.h (BITSET_WORD_BITS): Likewise.
* lib/regexec.c (check_dst_limits_calc_pos_1):
(heck_subexp_matching_top): Likewise.
* lib/regex_internal.c (re_string_reconstruct):
Don't assume UCHAR_MAX == 255.
* lib/regex_internal.h (bitset_set_all): Likewise.
* lib/regex_internal.h (BITSET_WORD_BITS): Renamed from UINT_BITS.
All uses changed.
(BITSET_WORDS): Renamed from BITSET_UINTS. All uses changed.
(bitset_word): New type, replacing 'unsigned int' for bitset uses.
All uses changed.
(BITSET_WORD_MAX): New macro.
(bitset_set, bitset_clear, bitset_contain, bitset_empty):
(bitset_set_all, bitset_copy): Now inline functions, not macros.
(bitset_empty, bitset_copy):
Prefer sizeof (bitset) to multiplying it out ourselves.
(bitset_not_merge): Remove; unused.
(bitset_contain): Return bool, not unsigned int with one bit on.
All callers changed.
* lib/regexec.c (build_trtable): Don't assume bitset has no stricter
alignment than re_node_set; do this by defining a new internal
type struct dests_alloc and using it to allocate memory.
* config/srclist.txt: Add glibc bug 1302.
+2005-09-06 Paul Eggert <eggert@cs.ucla.edu>
+
+ * srclist.txt: Add glibc bug 1302.
+
2005-09-03 Simon Josefsson <jas@extundo.com>
- * srclist.txt: Add glibc bugs 1293.
+ * srclist.txt: Add glibc bug 1293.
2005-09-02 Bruno Haible <bruno@clisp.org>
-# $Id: srclist.txt,v 1.102 2005-09-03 09:05:55 jas Exp $
+# $Id: srclist.txt,v 1.103 2005-09-06 07:36:48 eggert Exp $
# Files for which we are not the source. See ./srclistvars.sh for the
# variable definitions.
# http://sources.redhat.com/bugzilla/show_bug.cgi?id=1282
# http://sources.redhat.com/bugzilla/show_bug.cgi?id=1285
# http://sources.redhat.com/bugzilla/show_bug.cgi?id=1291
+# http://sources.redhat.com/bugzilla/show_bug.cgi?id=1302
#$LIBCSRC/posix/regcomp.c lib gpl
#
# http://sources.redhat.com/bugzilla/show_bug.cgi?id=1238
# http://sources.redhat.com/bugzilla/show_bug.cgi?id=1286
# http://sources.redhat.com/bugzilla/show_bug.cgi?id=1287
# http://sources.redhat.com/bugzilla/show_bug.cgi?id=1291
+# http://sources.redhat.com/bugzilla/show_bug.cgi?id=1302
#$LIBCSRC/posix/regex_internal.c lib gpl
#
# http://sources.redhat.com/bugzilla/show_bug.cgi?id=1054
# http://sources.redhat.com/bugzilla/show_bug.cgi?id=1281
# http://sources.redhat.com/bugzilla/show_bug.cgi?id=1285
# http://sources.redhat.com/bugzilla/show_bug.cgi?id=1291
+# http://sources.redhat.com/bugzilla/show_bug.cgi?id=1302
#$LIBCSRC/posix/regex_internal.h lib gpl
#
# http://sources.redhat.com/bugzilla/show_bug.cgi?id=1216
# http://sources.redhat.com/bugzilla/show_bug.cgi?id=1282
# http://sources.redhat.com/bugzilla/show_bug.cgi?id=1284
# http://sources.redhat.com/bugzilla/show_bug.cgi?id=1285
+# http://sources.redhat.com/bugzilla/show_bug.cgi?id=1302
#$LIBCSRC/posix/regexec.c lib gpl
#
# c89 changes $LIBCSRC/string/strdup.c lib gpl
+2005-09-05 Paul Eggert <eggert@cs.ucla.edu>
+
+ Change bitset word type from unsigned int to unsigned long int,
+ as this has better performance on typical 64-bit hosts.
+ Port bitset code to hosts with unusual word sizes.
+ * regcomp.c (build_equiv_class, build_charclass, build_range_exp):
+ (build_collating_symbol):
+ Prefer bitset to re_bitset_ptr_t in prototypes, when the actual
+ argument is a bitset. This is merely a style issue, but it makes
+ it clearer that an entire array is expected.
+ (re_compile_fastmap_iter, init_dfa, init_word_char, optimize_subexps):
+ * regcomp.c (lower_subexp, parse_bracket_exp, built_charclass_op):
+ Port to the case where bitset_word is not the same as unsigned int.
+ * regex_internal.h (bitset_set, bitset_clear, bitset_contain):
+ (bitset_not, bitset_merge, bitset_set_all, bitset_mask):
+ Likewise.
+ * regexec.c (check_dst_limits_calc_pos_1, check_subexp_matching_top):
+ (build_trtable, group_nodes_into_DFAstates):
+ Likewise.
+ * regcomp.c (re_compile_fastmap_iter, utf8_sb_map, optimize_utf8):
+ Don't assume that SBC_MAX is a multiple of BITSET_WORD_BITS.
+ * regex_internal.h (bitset_set_all, bitset_not): Likewise.
+ * regexec.c (group_nodes_into_DFAstates): Likewise.
+ * regcomp.c (utf8_sb_map): Don't assume UINT_MAX == 0xffffffff.
+ * regcomp.c (optimize_subexps, lower_subexp):
+ Work even if bitset_word has holes in its bitwise representation.
+ * regex_internal.h (BITSET_WORD_BITS): Likewise.
+ * regexec.c (check_dst_limits_calc_pos_1, check_subexp_matching_top):
+ Likewise.
+ * regex_internal.c (re_string_reconstruct):
+ Don't assume UCHAR_MAX == 255.
+ * regex_internal.h (bitset_set_all): Likewise.
+ * regex_internal.h (BITSET_WORD_BITS): Renamed from UINT_BITS.
+ All uses changed.
+ (BITSET_WORDS): Renamed from BITSET_UINTS. All uses changed.
+ (bitset_word): New type, replacing 'unsigned int' for bitset uses.
+ All uses changed.
+ (BITSET_WORD_MAX): New macro.
+ (bitset_set, bitset_clear, bitset_contain, bitset_empty):
+ (bitset_set_all, bitset_copy): Now inline functions, not macros.
+ (bitset_empty, bitset_copy):
+ Prefer sizeof (bitset) to multiplying it out ourselves.
+ (bitset_not_merge): Remove; unused.
+ (bitset_contain): Return bool, not unsigned int with one bit on.
+ All callers changed.
+ * regexec.c (build_trtable): Don't assume bitset has no stricter
+ alignment than re_node_set; do this by defining a new internal
+ type struct dests_alloc and using it to allocate memory.
+
2005-09-02 Paul Eggert <eggert@cs.ucla.edu>
Check for arithmetic overflow when calculating sizes, to prevent
re_string_t *regexp,
re_token_t *token);
#ifdef RE_ENABLE_I18N
-static reg_errcode_t build_equiv_class (re_bitset_ptr_t sbcset,
+static reg_errcode_t build_equiv_class (bitset sbcset,
re_charset_t *mbcset,
Idx *equiv_class_alloc,
const unsigned char *name);
static reg_errcode_t build_charclass (unsigned REG_TRANSLATE_TYPE trans,
- re_bitset_ptr_t sbcset,
+ bitset sbcset,
re_charset_t *mbcset,
Idx *char_class_alloc,
const unsigned char *class_name,
reg_syntax_t syntax);
#else /* not RE_ENABLE_I18N */
-static reg_errcode_t build_equiv_class (re_bitset_ptr_t sbcset,
+static reg_errcode_t build_equiv_class (bitset sbcset,
const unsigned char *name);
static reg_errcode_t build_charclass (unsigned REG_TRANSLATE_TYPE trans,
- re_bitset_ptr_t sbcset,
+ bitset sbcset,
const unsigned char *class_name,
reg_syntax_t syntax);
#endif /* not RE_ENABLE_I18N */
else if (type == SIMPLE_BRACKET)
{
int i, j, ch;
- for (i = 0, ch = 0; i < BITSET_UINTS; ++i)
- for (j = 0; j < UINT_BITS; ++j, ++ch)
- if (dfa->nodes[node].opr.sbcset[i] & (1u << j))
+ for (i = 0, ch = 0; i < BITSET_WORDS; ++i)
+ for (j = 0; j < BITSET_WORD_BITS; ++j, ++ch)
+ if (dfa->nodes[node].opr.sbcset[i] & ((bitset_word) 1 << j))
re_set_fastmap (fastmap, icase, ch);
}
#ifdef RE_ENABLE_I18N
is a valid collation element, and don't catch
'b' since 'b' is the only collation element
which starts from 'b'. */
- int j, ch;
const int32_t *table = (const int32_t *)
_NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
- for (i = 0, ch = 0; i < BITSET_UINTS; ++i)
- for (j = 0; j < UINT_BITS; ++j, ++ch)
- if (table[ch] < 0)
- re_set_fastmap (fastmap, icase, ch);
+ for (i = 0; i < SBC_MAX; ++i)
+ if (table[i] < 0)
+ re_set_fastmap (fastmap, icase, i);
}
# else
if (dfa->mb_cur_max > 1)
static const bitset utf8_sb_map =
{
/* Set the first 128 bits. */
-# if UINT_MAX == 0xffffffff
- 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff
-# else
-# error "Add case for new unsigned int size"
+# if 2 < BITSET_WORDS
+ BITSET_WORD_MAX,
+# endif
+# if 4 < BITSET_WORDS
+ BITSET_WORD_MAX,
+# endif
+# if 6 < BITSET_WORDS
+ BITSET_WORD_MAX,
# endif
+# if 8 < BITSET_WORDS
+# error "Invalid BITSET_WORDS"
+# endif
+ (BITSET_WORD_MAX
+ >> (SBC_MAX % BITSET_WORD_BITS == 0
+ ? 0
+ : BITSET_WORD_BITS - SBC_MAX % BITSET_WORD_BITS))
};
#endif
{
int i, j, ch;
- dfa->sb_char = re_calloc (unsigned int, BITSET_UINTS);
+ dfa->sb_char = re_calloc (bitset_word, BITSET_WORDS);
if (BE (dfa->sb_char == NULL, 0))
return REG_ESPACE;
- /* Clear all bits by, then set those corresponding to single
- byte chars. */
- bitset_empty (dfa->sb_char);
-
- for (i = 0, ch = 0; i < BITSET_UINTS; ++i)
- for (j = 0; j < UINT_BITS; ++j, ++ch)
+ /* Set the bits corresponding to single byte chars. */
+ for (i = 0, ch = 0; i < BITSET_WORDS; ++i)
+ for (j = 0; j < BITSET_WORD_BITS; ++j, ++ch)
{
wint_t wch = __btowc (ch);
if (wch != WEOF)
- dfa->sb_char[i] |= 1u << j;
+ dfa->sb_char[i] |= (bitset_word) 1 << j;
# ifndef _LIBC
if (isascii (ch) && wch != ch)
dfa->map_notascii = 1;
{
int i, j, ch;
dfa->word_ops_used = 1;
- for (i = 0, ch = 0; i < BITSET_UINTS; ++i)
- for (j = 0; j < UINT_BITS; ++j, ++ch)
+ for (i = 0, ch = 0; i < BITSET_WORDS; ++i)
+ for (j = 0; j < BITSET_WORD_BITS; ++j, ++ch)
if (isalnum (ch) || ch == '_')
- dfa->word_char[i] |= 1u << j;
+ dfa->word_char[i] |= (bitset_word) 1 << j;
}
/* Free the work area which are only used while compiling. */
return;
case SIMPLE_BRACKET:
/* Just double check. */
- for (i = 0x80 / UINT_BITS; i < BITSET_UINTS; ++i)
- if (dfa->nodes[node].opr.sbcset[i])
- return;
+ {
+ int rshift =
+ (SBC_MAX / 2 % BITSET_WORD_BITS == 0
+ ? 0
+ : BITSET_WORD_BITS - SBC_MAX / 2 % BITSET_WORD_BITS);
+ for (i = SBC_MAX / 2 / BITSET_WORD_BITS; i < BITSET_WORDS; ++i)
+ {
+ if (dfa->nodes[node].opr.sbcset[i] >> rshift != 0)
+ return;
+ rshift = 0;
+ }
+ }
break;
default:
abort ();
node->left->parent = node;
dfa->subexp_map[other_idx] = dfa->subexp_map[node->token.opr.idx];
- if (other_idx < CHAR_BIT * sizeof dfa->used_bkref_map)
- dfa->used_bkref_map &= ~(1u << other_idx);
+ if (other_idx < BITSET_WORD_BITS)
+ dfa->used_bkref_map &= ~ ((bitset_word) 1 << other_idx);
}
return REG_NOERROR;
very common, so we do not lose much. An example that triggers
this case is the sed "script" /\(\)/x. */
&& node->left != NULL
- && (node->token.opr.idx >= CHAR_BIT * sizeof dfa->used_bkref_map
- || !(dfa->used_bkref_map & (1u << node->token.opr.idx))))
+ && ! (node->token.opr.idx < BITSET_WORD_BITS
+ && dfa->used_bkref_map & ((bitset_word) 1 << node->token.opr.idx)))
return node->left;
/* Convert the SUBEXP node to the concatenation of an
update it. */
static reg_errcode_t
-build_range_exp (re_bitset_ptr_t sbcset,
+build_range_exp (bitset sbcset,
# ifdef RE_ENABLE_I18N
re_charset_t *mbcset, Idx *range_alloc,
# endif
pointer argument since we may update it. */
static reg_errcode_t
-build_collating_symbol (re_bitset_ptr_t sbcset,
+build_collating_symbol (bitset sbcset,
# ifdef RE_ENABLE_I18N
re_charset_t *mbcset, Idx *coll_sym_alloc,
# endif
auto inline reg_errcode_t
__attribute ((always_inline))
- build_range_exp (re_bitset_ptr_t sbcset, re_charset_t *mbcset,
+ build_range_exp (bitset sbcset, re_charset_t *mbcset,
Idx *range_alloc,
bracket_elem_t *start_elem, bracket_elem_t *end_elem)
{
auto inline reg_errcode_t
__attribute ((always_inline))
- build_collating_symbol (re_bitset_ptr_t sbcset, re_charset_t *mbcset,
+ build_collating_symbol (bitset sbcset, re_charset_t *mbcset,
Idx *coll_sym_alloc, const unsigned char *name)
{
int32_t elem, idx;
_NL_COLLATE_SYMB_EXTRAMB);
}
#endif
- sbcset = re_calloc (unsigned int, BITSET_UINTS);
+ sbcset = re_calloc (bitset_word, BITSET_WORDS);
#ifdef RE_ENABLE_I18N
mbcset = re_calloc (re_charset_t, 1);
#endif /* RE_ENABLE_I18N */
mbc_tree = create_token_tree (dfa, NULL, NULL, &br_token);
if (BE (mbc_tree == NULL, 0))
goto parse_bracket_exp_espace;
- for (sbc_idx = 0; sbc_idx < BITSET_UINTS; ++sbc_idx)
+ for (sbc_idx = 0; sbc_idx < BITSET_WORDS; ++sbc_idx)
if (sbcset[sbc_idx])
break;
/* If there are no bits set in sbcset, there is no point
of having both SIMPLE_BRACKET and COMPLEX_BRACKET. */
- if (sbc_idx < BITSET_UINTS)
+ if (sbc_idx < BITSET_WORDS)
{
/* Build a tree for simple bracket. */
br_token.type = SIMPLE_BRACKET;
is a pointer argument sinse we may update it. */
static reg_errcode_t
-build_equiv_class (re_bitset_ptr_t sbcset,
+build_equiv_class (bitset sbcset,
#ifdef RE_ENABLE_I18N
re_charset_t *mbcset, Idx *equiv_class_alloc,
#endif
is a pointer argument sinse we may update it. */
static reg_errcode_t
-build_charclass (unsigned REG_TRANSLATE_TYPE trans, re_bitset_ptr_t sbcset,
+build_charclass (unsigned REG_TRANSLATE_TYPE trans, bitset sbcset,
#ifdef RE_ENABLE_I18N
re_charset_t *mbcset, Idx *char_class_alloc,
#endif
re_token_t br_token;
bin_tree_t *tree;
- sbcset = re_calloc (unsigned int, BITSET_UINTS);
+ sbcset = re_calloc (bitset_word, BITSET_WORDS);
#ifdef RE_ENABLE_I18N
mbcset = re_calloc (re_charset_t, 1);
#endif /* RE_ENABLE_I18N */
for (wcs_idx = 0; wcs_idx < pstr->valid_len; ++wcs_idx)
pstr->wcs[wcs_idx] = WEOF;
if (pstr->mbs_allocated)
- memset (pstr->mbs, 255, pstr->valid_len);
+ memset (pstr->mbs, -1, pstr->valid_len);
}
pstr->valid_raw_len = pstr->valid_len;
pstr->tip_context = ((BE (pstr->word_ops_used != 0, 0)
/* A hash value, suitable for computing hash tables. */
typedef __re_size_t re_hashval_t;
-/* Number of bits in an unsinged int. */
-#define UINT_BITS (sizeof (unsigned int) * CHAR_BIT)
-/* Number of unsigned int in an bit_set. */
-#define BITSET_UINTS ((SBC_MAX + UINT_BITS - 1) / UINT_BITS)
-typedef unsigned int bitset[BITSET_UINTS];
-typedef unsigned int *re_bitset_ptr_t;
-typedef const unsigned int *re_const_bitset_ptr_t;
-
-#define bitset_set(set,i) (set[i / UINT_BITS] |= 1u << i % UINT_BITS)
-#define bitset_clear(set,i) (set[i / UINT_BITS] &= ~(1u << i % UINT_BITS))
-#define bitset_contain(set,i) (set[i / UINT_BITS] & (1u << i % UINT_BITS))
-#define bitset_empty(set) memset (set, 0, sizeof (unsigned int) * BITSET_UINTS)
-#define bitset_set_all(set) \
- memset (set, 255, sizeof (unsigned int) * BITSET_UINTS)
-#define bitset_copy(dest,src) \
- memcpy (dest, src, sizeof (unsigned int) * BITSET_UINTS)
+/* An integer used to represent a set of bits. It must be unsigned,
+ and must be at least as wide as unsigned int. */
+typedef unsigned long int bitset_word;
+
+/* Maximum value of a bitset word. It must be useful in preprocessor
+ contexts, and must be consistent with bitset_word. */
+#define BITSET_WORD_MAX ULONG_MAX
+
+/* Number of bits in a bitset word. Avoid greater-than-32-bit
+ integers and unconditional shifts by more than 31 bits, as they're
+ not portable. */
+#if BITSET_WORD_MAX == 0xffffffff
+# define BITSET_WORD_BITS 32
+#elif BITSET_WORD_MAX >> 31 >> 5 == 1
+# define BITSET_WORD_BITS 36
+#elif BITSET_WORD_MAX >> 31 >> 16 == 1
+# define BITSET_WORD_BITS 48
+#elif BITSET_WORD_MAX >> 31 >> 28 == 1
+# define BITSET_WORD_BITS 60
+#elif BITSET_WORD_MAX >> 31 >> 31 >> 1 == 1
+# define BITSET_WORD_BITS 64
+#elif BITSET_WORD_MAX >> 31 >> 31 >> 9 == 1
+# define BITSET_WORD_BITS 72
+#elif BITSET_WORD_MAX >> 31 >> 31 >> 31 >> 31 >> 3 == 1
+# define BITSET_WORD_BITS 128
+#elif BITSET_WORD_MAX >> 31 >> 31 >> 31 >> 31 >> 31 >> 31 >> 31 >> 31 >> 7 == 1
+# define BITSET_WORD_BITS 256
+#elif BITSET_WORD_MAX >> 31 >> 31 >> 31 >> 31 >> 31 >> 31 >> 31 >> 31 >> 7 > 1
+# define BITSET_WORD_BITS 257 /* any value > SBC_MAX will do here */
+# if BITSET_WORD_BITS <= SBC_MAX
+# error "Invalid SBC_MAX"
+# endif
+#else
+# error "Add case for new bitset_word size"
+#endif
+
+/* Number of bitset words in a bitset. */
+#define BITSET_WORDS ((SBC_MAX + BITSET_WORD_BITS - 1) / BITSET_WORD_BITS)
+
+typedef bitset_word bitset[BITSET_WORDS];
+typedef bitset_word *re_bitset_ptr_t;
+typedef const bitset_word *re_const_bitset_ptr_t;
#define PREV_WORD_CONSTRAINT 0x0001
#define PREV_NOTWORD_CONSTRAINT 0x0002
Idx nbackref; /* The number of backreference in this dfa. */
/* Bitmap expressing which backreference is used. */
- unsigned int used_bkref_map;
- unsigned int completed_bkref_map;
+ bitset_word used_bkref_map;
+ bitset_word completed_bkref_map;
unsigned int has_plural_match : 1;
/* If this dfa has "multibyte node", which is a backreference or
/* Inline functions for bitset operation. */
+
static inline void
-bitset_not (bitset set)
+bitset_set (bitset set, Idx i)
{
- int bitset_i;
- for (bitset_i = 0; bitset_i < BITSET_UINTS; ++bitset_i)
- set[bitset_i] = ~set[bitset_i];
+ set[i / BITSET_WORD_BITS] |= (bitset_word) 1 << i % BITSET_WORD_BITS;
}
static inline void
-bitset_merge (bitset dest, const bitset src)
+bitset_clear (bitset set, Idx i)
+{
+ set[i / BITSET_WORD_BITS] &= ~ ((bitset_word) 1 << i % BITSET_WORD_BITS);
+}
+
+static inline bool
+bitset_contain (const bitset set, Idx i)
+{
+ return (set[i / BITSET_WORD_BITS] >> i % BITSET_WORD_BITS) & 1;
+}
+
+static inline void
+bitset_empty (bitset set)
{
- int bitset_i;
- for (bitset_i = 0; bitset_i < BITSET_UINTS; ++bitset_i)
- dest[bitset_i] |= src[bitset_i];
+ memset (set, 0, sizeof (bitset));
}
static inline void
-bitset_not_merge (bitset dest, const bitset src)
+bitset_set_all (bitset set)
+{
+ memset (set, -1, sizeof (bitset_word) * (SBC_MAX / BITSET_WORD_BITS));
+ if (SBC_MAX % BITSET_WORD_BITS != 0)
+ set[BITSET_WORDS - 1] =
+ ((bitset_word) 1 << SBC_MAX % BITSET_WORD_BITS) - 1;
+}
+
+static inline void
+bitset_copy (bitset dest, const bitset src)
+{
+ memcpy (dest, src, sizeof (bitset));
+}
+
+static inline void
+bitset_not (bitset set)
+{
+ int i;
+ for (i = 0; i < SBC_MAX / BITSET_WORD_BITS; ++i)
+ set[i] = ~set[i];
+ if (SBC_MAX % BITSET_WORD_BITS != 0)
+ set[BITSET_WORDS - 1] =
+ (((bitset_word) 1 << SBC_MAX % BITSET_WORD_BITS) - 1
+ & ~set[BITSET_WORDS - 1]);
+}
+
+static inline void
+bitset_merge (bitset dest, const bitset src)
{
int i;
- for (i = 0; i < BITSET_UINTS; ++i)
- dest[i] |= ~src[i];
+ for (i = 0; i < BITSET_WORDS; ++i)
+ dest[i] |= src[i];
}
static inline void
bitset_mask (bitset dest, const bitset src)
{
- int bitset_i;
- for (bitset_i = 0; bitset_i < BITSET_UINTS; ++bitset_i)
- dest[bitset_i] &= src[bitset_i];
+ int i;
+ for (i = 0; i < BITSET_WORDS; ++i)
+ dest[i] &= src[i];
}
#if defined RE_ENABLE_I18N
if (ent->node != node)
continue;
- if (subexp_idx
- < CHAR_BIT * sizeof ent->eps_reachable_subexps_map
- && !(ent->eps_reachable_subexps_map & (1u << subexp_idx)))
+ if (subexp_idx < BITSET_WORD_BITS
+ && !(ent->eps_reachable_subexps_map
+ & ((bitset_word) 1 << subexp_idx)))
continue;
/* Recurse trying to reach the OP_OPEN_SUBEXP and
if (cpos == 0 && (boundaries & 2))
return 0;
- if (subexp_idx
- < CHAR_BIT * sizeof ent->eps_reachable_subexps_map)
- ent->eps_reachable_subexps_map &= ~(1u << subexp_idx);
+ if (subexp_idx < BITSET_WORD_BITS)
+ ent->eps_reachable_subexps_map &=
+ ~ ((bitset_word) 1 << subexp_idx);
}
while (ent++->more);
}
{
Idx node = cur_nodes->elems[node_idx];
if (dfa->nodes[node].type == OP_OPEN_SUBEXP
- && dfa->nodes[node].opr.idx < CHAR_BIT * sizeof dfa->used_bkref_map
- && dfa->used_bkref_map & (1u << dfa->nodes[node].opr.idx))
+ && dfa->nodes[node].opr.idx < BITSET_WORD_BITS
+ && (dfa->used_bkref_map
+ & ((bitset_word) 1 << dfa->nodes[node].opr.idx)))
{
err = match_ctx_add_subtop (mctx, node, str_idx);
if (BE (err != REG_NOERROR, 0))
Idx i, j;
int ch;
bool need_word_trtable = false;
- unsigned int elem, mask;
+ bitset_word elem, mask;
bool dests_node_malloced = false, dest_states_malloced = false;
Idx ndests; /* Number of the destination states from `state'. */
re_dfastate_t **trtable;
bitset *dests_ch;
bitset acceptable;
+ struct dests_alloc
+ {
+ re_node_set dests_node[SBC_MAX];
+ bitset dests_ch[SBC_MAX];
+ } *dests_alloc;
+
/* We build DFA states which corresponds to the destination nodes
from `state'. `dests_node[i]' represents the nodes which i-th
destination state contains, and `dests_ch[i]' represents the
characters which i-th destination state accepts. */
- if (__libc_use_alloca ((sizeof (re_node_set) + sizeof (bitset)) * SBC_MAX))
- dests_node = (re_node_set *)
- alloca ((sizeof (re_node_set) + sizeof (bitset)) * SBC_MAX);
+ if (__libc_use_alloca (sizeof (struct dests_alloc)))
+ dests_alloc = (struct dests_alloc *) alloca (sizeof dests_alloc[0]);
else
{
- dests_node = (re_node_set *)
- malloc ((sizeof (re_node_set) + sizeof (bitset)) * SBC_MAX);
- if (BE (dests_node == NULL, 0))
+ dests_alloc = re_malloc (struct dests_alloc, 1);
+ if (BE (dests_alloc == NULL, 0))
return false;
dests_node_malloced = true;
}
- dests_ch = (bitset *) (dests_node + SBC_MAX);
+ dests_node = dests_alloc->dests_node;
+ dests_ch = dests_alloc->dests_ch;
/* Initialize transiton table. */
state->word_trtable = state->trtable = NULL;
if (BE (! REG_VALID_NONZERO_INDEX (ndests), 0))
{
if (dests_node_malloced)
- free (dests_node);
+ free (dests_alloc);
if (ndests == 0)
{
state->trtable = re_calloc (re_dfastate_t *, SBC_MAX);
for (i = 0; i < ndests; ++i)
re_node_set_free (dests_node + i);
if (dests_node_malloced)
- free (dests_node);
+ free (dests_alloc);
return false;
}
dest_states_malloced = true;
goto out_free;
/* For all characters ch...: */
- for (i = 0; i < BITSET_UINTS; ++i)
- for (ch = i * UINT_BITS, elem = acceptable[i], mask = 1;
+ for (i = 0; i < BITSET_WORDS; ++i)
+ for (ch = i * BITSET_WORD_BITS, elem = acceptable[i], mask = 1;
elem;
mask <<= 1, elem >>= 1, ++ch)
if (BE (elem & 1, 0))
goto out_free;
/* For all characters ch...: */
- for (i = 0; i < BITSET_UINTS; ++i)
- for (ch = i * UINT_BITS, elem = acceptable[i], mask = 1;
+ for (i = 0; i < BITSET_WORDS; ++i)
+ for (ch = i * BITSET_WORD_BITS, elem = acceptable[i], mask = 1;
elem;
mask <<= 1, elem >>= 1, ++ch)
if (BE (elem & 1, 0))
re_node_set_free (dests_node + i);
if (dests_node_malloced)
- free (dests_node);
+ free (dests_alloc);
return true;
}
#ifdef RE_ENABLE_I18N
else if (type == OP_UTF8_PERIOD)
{
- memset (accepts, 255, sizeof (unsigned int) * BITSET_UINTS / 2);
+ if (SBC_MAX / 2 % BITSET_WORD_BITS == 0)
+ memset (accepts, -1, sizeof accepts / 2);
+ else
+ bitset_merge (accepts, utf8_sb_map);
if (!(dfa->syntax & REG_DOT_NEWLINE))
bitset_clear (accepts, '\n');
if (dfa->syntax & REG_DOT_NOT_NULL)
{
if (constraint & NEXT_NEWLINE_CONSTRAINT)
{
- unsigned int accepts_newline =
- bitset_contain (accepts, NEWLINE_CHAR);
+ bool accepts_newline = bitset_contain (accepts, NEWLINE_CHAR);
bitset_empty (accepts);
if (accepts_newline)
bitset_set (accepts, NEWLINE_CHAR);
if (constraint & NEXT_WORD_CONSTRAINT)
{
- unsigned int any_set = 0;
+ bitset_word any_set = 0;
if (type == CHARACTER && !node->word_char)
{
bitset_empty (accepts);
}
#ifdef RE_ENABLE_I18N
if (dfa->mb_cur_max > 1)
- for (j = 0; j < BITSET_UINTS; ++j)
+ for (j = 0; j < BITSET_WORDS; ++j)
any_set |= (accepts[j] &= (dfa->word_char[j] | ~dfa->sb_char[j]));
else
#endif
- for (j = 0; j < BITSET_UINTS; ++j)
+ for (j = 0; j < BITSET_WORDS; ++j)
any_set |= (accepts[j] &= dfa->word_char[j]);
if (!any_set)
continue;
}
if (constraint & NEXT_NOTWORD_CONSTRAINT)
{
- unsigned int any_set = 0;
+ bitset_word any_set = 0;
if (type == CHARACTER && node->word_char)
{
bitset_empty (accepts);
}
#ifdef RE_ENABLE_I18N
if (dfa->mb_cur_max > 1)
- for (j = 0; j < BITSET_UINTS; ++j)
+ for (j = 0; j < BITSET_WORDS; ++j)
any_set |= (accepts[j] &= ~(dfa->word_char[j] & dfa->sb_char[j]));
else
#endif
- for (j = 0; j < BITSET_UINTS; ++j)
+ for (j = 0; j < BITSET_WORDS; ++j)
any_set |= (accepts[j] &= ~dfa->word_char[j]);
if (!any_set)
continue;
bitset intersec; /* Intersection sets, see below. */
bitset remains;
/* Flags, see below. */
- unsigned int has_intersec, not_subset, not_consumed;
+ bitset_word has_intersec, not_subset, not_consumed;
/* Optimization, skip if this state doesn't accept the character. */
if (type == CHARACTER && !bitset_contain (dests_ch[j], node->opr.c))
/* Enumerate the intersection set of this state and `accepts'. */
has_intersec = 0;
- for (k = 0; k < BITSET_UINTS; ++k)
+ for (k = 0; k < BITSET_WORDS; ++k)
has_intersec |= intersec[k] = accepts[k] & dests_ch[j][k];
/* And skip if the intersection set is empty. */
if (!has_intersec)
/* Then check if this state is a subset of `accepts'. */
not_subset = not_consumed = 0;
- for (k = 0; k < BITSET_UINTS; ++k)
+ for (k = 0; k < BITSET_WORDS; ++k)
{
not_subset |= remains[k] = ~accepts[k] & dests_ch[j][k];
not_consumed |= accepts[k] = accepts[k] & ~dests_ch[j][k];