summaryrefslogtreecommitdiff
path: root/src/regex-emacs.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/regex-emacs.c')
-rw-r--r--src/regex-emacs.c1413
1 files changed, 795 insertions, 618 deletions
diff --git a/src/regex-emacs.c b/src/regex-emacs.c
index f4fe93210f7..0ec0c6eb63f 100644
--- a/src/regex-emacs.c
+++ b/src/regex-emacs.c
@@ -47,12 +47,8 @@
/* Make syntax table lookup grant data in gl_state. */
#define SYNTAX(c) syntax_property (c, 1)
-/* Convert the pointer to the char to BEG-based offset from the start. */
-#define PTR_TO_OFFSET(d) POS_AS_IN_BUFFER (POINTER_TO_OFFSET (d))
-/* Strings are 0-indexed, buffers are 1-indexed; pun on the boolean
- result to get the right base index. */
-#define POS_AS_IN_BUFFER(p) \
- ((p) + (NILP (gl_state.object) || BUFFERP (gl_state.object)))
+/* Explicit syntax lookup using the buffer-local table. */
+#define BUFFER_SYNTAX(c) syntax_property (c, 0)
#define RE_MULTIBYTE_P(bufp) ((bufp)->multibyte)
#define RE_TARGET_MULTIBYTE_P(bufp) ((bufp)->target_multibyte)
@@ -103,7 +99,7 @@
#define IS_REAL_ASCII(c) ((c) < 0200)
/* 1 if C is a unibyte character. */
-#define ISUNIBYTE(c) (SINGLE_BYTE_CHAR_P ((c)))
+#define ISUNIBYTE(c) SINGLE_BYTE_CHAR_P (c)
/* The Emacs definitions should not be directly affected by locales. */
@@ -139,18 +135,22 @@
#define ISLOWER(c) lowercasep (c)
+#define ISUPPER(c) uppercasep (c)
+
+/* The following predicates use the buffer-local syntax table and
+ ignore syntax properties, for consistency with the up-front
+ assumptions made at compile time. */
+
#define ISPUNCT(c) (IS_REAL_ASCII (c) \
? ((c) > ' ' && (c) < 0177 \
&& !(((c) >= 'a' && (c) <= 'z') \
|| ((c) >= 'A' && (c) <= 'Z') \
|| ((c) >= '0' && (c) <= '9'))) \
- : SYNTAX (c) != Sword)
+ : BUFFER_SYNTAX (c) != Sword)
-#define ISSPACE(c) (SYNTAX (c) == Swhitespace)
+#define ISSPACE(c) (BUFFER_SYNTAX (c) == Swhitespace)
-#define ISUPPER(c) uppercasep (c)
-
-#define ISWORD(c) (SYNTAX (c) == Sword)
+#define ISWORD(c) (BUFFER_SYNTAX (c) == Sword)
/* Use alloca instead of malloc. This is because using malloc in
re_search* or re_match* could cause memory leaks when C-g is used
@@ -268,7 +268,9 @@ typedef enum
on_failure_jump,
/* Like on_failure_jump, but pushes a placeholder instead of the
- current string position when executed. */
+ current string position when executed. Upon failure,
+ the current string position is thus not restored.
+ Used only for single-char loops that don't require backtracking. */
on_failure_keep_string_jump,
/* Just like 'on_failure_jump', except that it checks that we
@@ -338,11 +340,12 @@ typedef enum
/* Store NUMBER in two contiguous bytes starting at DESTINATION. */
-#define STORE_NUMBER(destination, number) \
- do { \
- (destination)[0] = (number) & 0377; \
- (destination)[1] = (number) >> 8; \
- } while (false)
+static void
+STORE_NUMBER (unsigned char *destination, int16_t number)
+{
+ (destination)[0] = (number) & 0377;
+ (destination)[1] = (number) >> 8;
+}
/* Same as STORE_NUMBER, except increment DESTINATION to
the byte after where the number is stored. Therefore, DESTINATION
@@ -367,6 +370,12 @@ extract_number (re_char *source)
return leading_byte * 256 + source[0];
}
+static re_char *
+extract_address (re_char *source)
+{
+ return source + 2 + extract_number (source);
+}
+
/* Same as EXTRACT_NUMBER, except increment SOURCE to after the number.
SOURCE must be an lvalue. */
@@ -434,38 +443,27 @@ extract_number_and_incr (re_char **source)
/* If REGEX_EMACS_DEBUG is defined, print many voluminous messages
(if the variable regex_emacs_debug is positive). */
-#ifdef REGEX_EMACS_DEBUG
+#if defined REGEX_EMACS_DEBUG || ENABLE_CHECKING
/* Use standard I/O for debugging. */
# include "sysstdio.h"
-static int regex_emacs_debug = -100000;
-
-# define DEBUG_STATEMENT(e) e
-# define DEBUG_PRINT(...) \
- if (regex_emacs_debug > 0) fprintf (stderr, __VA_ARGS__)
-# define DEBUG_COMPILES_ARGUMENTS
-# define DEBUG_PRINT_COMPILED_PATTERN(p, s, e) \
- if (regex_emacs_debug > 0) print_partial_compiled_pattern (s, e)
-# define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2) \
- if (regex_emacs_debug > 0) print_double_string (w, s1, sz1, s2, sz2)
-
static void
-debug_putchar (int c)
+debug_putchar (FILE *dest, int c)
{
if (c >= 32 && c <= 126)
- putc (c, stderr);
+ putc (c, dest);
else
{
unsigned int uc = c;
- fprintf (stderr, "{%02x}", uc);
+ fprintf (dest, "{%02x}", uc);
}
}
/* Print the fastmap in human-readable form. */
static void
-print_fastmap (char *fastmap)
+print_fastmap (FILE *dest, char *fastmap)
{
bool was_a_range = false;
int i = 0;
@@ -475,7 +473,7 @@ print_fastmap (char *fastmap)
if (fastmap[i++])
{
was_a_range = false;
- debug_putchar (i - 1);
+ debug_putchar (dest, i - 1);
while (i < (1 << BYTEWIDTH) && fastmap[i])
{
was_a_range = true;
@@ -483,12 +481,12 @@ print_fastmap (char *fastmap)
}
if (was_a_range)
{
- debug_putchar ('-');
- debug_putchar (i - 1);
+ debug_putchar (dest, '-');
+ debug_putchar (dest, i - 1);
}
}
}
- putc ('\n', stderr);
+ putc ('\n', dest);
}
@@ -496,7 +494,7 @@ print_fastmap (char *fastmap)
the START pointer into it and ending just before the pointer END. */
static void
-print_partial_compiled_pattern (re_char *start, re_char *end)
+print_partial_compiled_pattern (FILE *dest, re_char *start, re_char *end)
{
int mcnt, mcnt2;
re_char *p = start;
@@ -504,50 +502,50 @@ print_partial_compiled_pattern (re_char *start, re_char *end)
if (start == NULL)
{
- fputs ("(null)\n", stderr);
+ fputs ("(null)\n", dest);
return;
}
/* Loop over pattern commands. */
while (p < pend)
{
- fprintf (stderr, "%td:\t", p - start);
+ fprintf (dest, "%td:\t", p - start);
switch ((re_opcode_t) *p++)
{
case no_op:
- fputs ("/no_op", stderr);
+ fputs ("/no_op", dest);
break;
case succeed:
- fputs ("/succeed", stderr);
+ fputs ("/succeed", dest);
break;
case exactn:
mcnt = *p++;
- fprintf (stderr, "/exactn/%d", mcnt);
+ fprintf (dest, "/exactn/%d", mcnt);
do
{
- debug_putchar ('/');
- debug_putchar (*p++);
+ debug_putchar (dest, '/');
+ debug_putchar (dest, *p++);
}
while (--mcnt);
break;
case start_memory:
- fprintf (stderr, "/start_memory/%d", *p++);
+ fprintf (dest, "/start_memory/%d", *p++);
break;
case stop_memory:
- fprintf (stderr, "/stop_memory/%d", *p++);
+ fprintf (dest, "/stop_memory/%d", *p++);
break;
case duplicate:
- fprintf (stderr, "/duplicate/%d", *p++);
+ fprintf (dest, "/duplicate/%d", *p++);
break;
case anychar:
- fputs ("/anychar", stderr);
+ fputs ("/anychar", dest);
break;
case charset:
@@ -558,11 +556,11 @@ print_partial_compiled_pattern (re_char *start, re_char *end)
int length = CHARSET_BITMAP_SIZE (p - 1);
bool has_range_table = CHARSET_RANGE_TABLE_EXISTS_P (p - 1);
- fprintf (stderr, "/charset [%s",
+ fprintf (dest, "/charset [%s",
(re_opcode_t) *(p - 1) == charset_not ? "^" : "");
- if (p + *p >= pend)
- fputs (" !extends past end of pattern! ", stderr);
+ if (p + (*p & 0x7f) >= pend)
+ fputs (" !extends past end of pattern! ", dest);
for (c = 0; c < 256; c++)
if (c / 8 < length
@@ -571,33 +569,33 @@ print_partial_compiled_pattern (re_char *start, re_char *end)
/* Are we starting a range? */
if (last + 1 == c && ! in_range)
{
- debug_putchar ('-');
+ debug_putchar (dest, '-');
in_range = true;
}
/* Have we broken a range? */
else if (last + 1 != c && in_range)
{
- debug_putchar (last);
+ debug_putchar (dest, last);
in_range = false;
}
if (! in_range)
- debug_putchar (c);
+ debug_putchar (dest, c);
last = c;
}
if (in_range)
- debug_putchar (last);
+ debug_putchar (dest, last);
- debug_putchar (']');
+ debug_putchar (dest, ']');
p += 1 + length;
if (has_range_table)
{
int count;
- fputs ("has-range-table", stderr);
+ fputs ("has-range-table", dest);
/* ??? Should print the range table; for now, just skip it. */
p += 2; /* skip range table bits */
@@ -608,160 +606,175 @@ print_partial_compiled_pattern (re_char *start, re_char *end)
break;
case begline:
- fputs ("/begline", stderr);
+ fputs ("/begline", dest);
break;
case endline:
- fputs ("/endline", stderr);
+ fputs ("/endline", dest);
break;
case on_failure_jump:
EXTRACT_NUMBER_AND_INCR (mcnt, p);
- fprintf (stderr, "/on_failure_jump to %td", p + mcnt - start);
+ fprintf (dest, "/on_failure_jump to %td", p + mcnt - start);
break;
case on_failure_keep_string_jump:
EXTRACT_NUMBER_AND_INCR (mcnt, p);
- fprintf (stderr, "/on_failure_keep_string_jump to %td",
+ fprintf (dest, "/on_failure_keep_string_jump to %td",
p + mcnt - start);
break;
case on_failure_jump_nastyloop:
EXTRACT_NUMBER_AND_INCR (mcnt, p);
- fprintf (stderr, "/on_failure_jump_nastyloop to %td",
+ fprintf (dest, "/on_failure_jump_nastyloop to %td",
p + mcnt - start);
break;
case on_failure_jump_loop:
EXTRACT_NUMBER_AND_INCR (mcnt, p);
- fprintf (stderr, "/on_failure_jump_loop to %td",
+ fprintf (dest, "/on_failure_jump_loop to %td",
p + mcnt - start);
break;
case on_failure_jump_smart:
EXTRACT_NUMBER_AND_INCR (mcnt, p);
- fprintf (stderr, "/on_failure_jump_smart to %td",
+ fprintf (dest, "/on_failure_jump_smart to %td",
p + mcnt - start);
break;
case jump:
EXTRACT_NUMBER_AND_INCR (mcnt, p);
- fprintf (stderr, "/jump to %td", p + mcnt - start);
+ fprintf (dest, "/jump to %td", p + mcnt - start);
break;
case succeed_n:
EXTRACT_NUMBER_AND_INCR (mcnt, p);
EXTRACT_NUMBER_AND_INCR (mcnt2, p);
- fprintf (stderr, "/succeed_n to %td, %d times",
+ fprintf (dest, "/succeed_n to %td, %d times",
p - 2 + mcnt - start, mcnt2);
break;
case jump_n:
EXTRACT_NUMBER_AND_INCR (mcnt, p);
EXTRACT_NUMBER_AND_INCR (mcnt2, p);
- fprintf (stderr, "/jump_n to %td, %d times",
+ fprintf (dest, "/jump_n to %td, %d times",
p - 2 + mcnt - start, mcnt2);
break;
case set_number_at:
EXTRACT_NUMBER_AND_INCR (mcnt, p);
EXTRACT_NUMBER_AND_INCR (mcnt2, p);
- fprintf (stderr, "/set_number_at location %td to %d",
+ fprintf (dest, "/set_number_at location %td to %d",
p - 2 + mcnt - start, mcnt2);
break;
case wordbound:
- fputs ("/wordbound", stderr);
+ fputs ("/wordbound", dest);
break;
case notwordbound:
- fputs ("/notwordbound", stderr);
+ fputs ("/notwordbound", dest);
break;
case wordbeg:
- fputs ("/wordbeg", stderr);
+ fputs ("/wordbeg", dest);
break;
case wordend:
- fputs ("/wordend", stderr);
+ fputs ("/wordend", dest);
break;
case symbeg:
- fputs ("/symbeg", stderr);
+ fputs ("/symbeg", dest);
break;
case symend:
- fputs ("/symend", stderr);
+ fputs ("/symend", dest);
break;
case syntaxspec:
- fputs ("/syntaxspec", stderr);
+ fputs ("/syntaxspec", dest);
mcnt = *p++;
- fprintf (stderr, "/%d", mcnt);
+ fprintf (dest, "/%d", mcnt);
break;
case notsyntaxspec:
- fputs ("/notsyntaxspec", stderr);
+ fputs ("/notsyntaxspec", dest);
mcnt = *p++;
- fprintf (stderr, "/%d", mcnt);
+ fprintf (dest, "/%d", mcnt);
break;
case at_dot:
- fputs ("/at_dot", stderr);
+ fputs ("/at_dot", dest);
break;
case categoryspec:
- fputs ("/categoryspec", stderr);
+ fputs ("/categoryspec", dest);
mcnt = *p++;
- fprintf (stderr, "/%d", mcnt);
+ fprintf (dest, "/%d", mcnt);
break;
case notcategoryspec:
- fputs ("/notcategoryspec", stderr);
+ fputs ("/notcategoryspec", dest);
mcnt = *p++;
- fprintf (stderr, "/%d", mcnt);
+ fprintf (dest, "/%d", mcnt);
break;
case begbuf:
- fputs ("/begbuf", stderr);
+ fputs ("/begbuf", dest);
break;
case endbuf:
- fputs ("/endbuf", stderr);
+ fputs ("/endbuf", dest);
break;
default:
- fprintf (stderr, "?%d", *(p-1));
+ fprintf (dest, "?%d", *(p-1));
}
- putc ('\n', stderr);
+ putc ('\n', dest);
}
- fprintf (stderr, "%td:\tend of pattern.\n", p - start);
+ fprintf (dest, "%td:\tend of pattern.\n", p - start);
}
-
-static void
-print_compiled_pattern (struct re_pattern_buffer *bufp)
+void
+print_compiled_pattern (FILE *dest, struct re_pattern_buffer *bufp)
{
+ if (!dest)
+ dest = stderr;
re_char *buffer = bufp->buffer;
- print_partial_compiled_pattern (buffer, buffer + bufp->used);
- fprintf (stderr, "%td bytes used/%td bytes allocated.\n",
+ print_partial_compiled_pattern (dest, buffer, buffer + bufp->used);
+ fprintf (dest, "%td bytes used/%td bytes allocated.\n",
bufp->used, bufp->allocated);
if (bufp->fastmap_accurate && bufp->fastmap)
{
- fputs ("fastmap: ", stderr);
- print_fastmap (bufp->fastmap);
+ fputs ("fastmap: ", dest);
+ print_fastmap (dest, bufp->fastmap);
}
- fprintf (stderr, "re_nsub: %td\t", bufp->re_nsub);
- fprintf (stderr, "regs_alloc: %d\t", bufp->regs_allocated);
- fprintf (stderr, "can_be_null: %d\n", bufp->can_be_null);
+ fprintf (dest, "re_nsub: %td\t", bufp->re_nsub);
+ fprintf (dest, "regs_alloc: %d\t", bufp->regs_allocated);
+ fprintf (dest, "can_be_null: %d\n", bufp->can_be_null);
/* Perhaps we should print the translate table? */
}
+#endif
+
+#ifdef REGEX_EMACS_DEBUG
+
+static int regex_emacs_debug = -100000;
+
+# define DEBUG_STATEMENT(e) e
+# define DEBUG_PRINT(...) \
+ if (regex_emacs_debug > 0) fprintf (stderr, __VA_ARGS__)
+# define DEBUG_COMPILES_ARGUMENTS
+# define DEBUG_PRINT_COMPILED_PATTERN(p, s, e) \
+ if (regex_emacs_debug > 0) print_partial_compiled_pattern (stderr, s, e)
+# define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2) \
+ if (regex_emacs_debug > 0) print_double_string (w, s1, sz1, s2, sz2)
static void
print_double_string (re_char *where, re_char *string1, ptrdiff_t size1,
@@ -775,12 +788,12 @@ print_double_string (re_char *where, re_char *string1, ptrdiff_t size1,
if (FIRST_STRING_P (where))
{
for (i = 0; i < string1 + size1 - where; i++)
- debug_putchar (where[i]);
+ debug_putchar (stderr, where[i]);
where = string2;
}
for (i = 0; i < string2 + size2 - where; i++)
- debug_putchar (where[i]);
+ debug_putchar (stderr, where[i]);
}
}
@@ -1166,8 +1179,8 @@ static void insert_op2 (re_opcode_t op, unsigned char *loc,
static bool at_begline_loc_p (re_char *pattern, re_char *p);
static bool at_endline_loc_p (re_char *p, re_char *pend);
static re_char *skip_one_char (re_char *p);
-static int analyze_first (re_char *p, re_char *pend,
- char *fastmap, bool multibyte);
+static bool analyze_first (struct re_pattern_buffer *bufp,
+ re_char *p, re_char *pend, char *fastmap);
/* Fetch the next character in the uncompiled pattern, with no
translation. */
@@ -1332,7 +1345,7 @@ struct range_table_work_area
/* Set a range (RANGE_START, RANGE_END) to WORK_AREA. */
#define SET_RANGE_TABLE_WORK_AREA(work_area, range_start, range_end) \
do { \
- EXTEND_RANGE_TABLE ((work_area), 2); \
+ EXTEND_RANGE_TABLE (work_area, 2); \
(work_area).table[(work_area).used++] = (range_start); \
(work_area).table[(work_area).used++] = (range_end); \
} while (false)
@@ -1367,7 +1380,7 @@ struct range_table_work_area
/* Set the bit for character C in a list. */
-#define SET_LIST_BIT(c) (b[((c)) / BYTEWIDTH] |= 1 << ((c) % BYTEWIDTH))
+#define SET_LIST_BIT(c) (b[(c) / BYTEWIDTH] |= 1 << ((c) % BYTEWIDTH))
/* Store characters in the range FROM to TO in the bitmap at B (for
@@ -1390,7 +1403,7 @@ struct range_table_work_area
C1 = TRANSLATE (C0); \
if (! ASCII_CHAR_P (C1)) \
{ \
- SET_RANGE_TABLE_WORK_AREA ((work_area), C1, C1); \
+ SET_RANGE_TABLE_WORK_AREA (work_area, C1, C1); \
if ((C1 = RE_CHAR_TO_UNIBYTE (C1)) < 0) \
C1 = C0; \
} \
@@ -1433,7 +1446,7 @@ struct range_table_work_area
} \
} \
if (I < USED) \
- SET_RANGE_TABLE_WORK_AREA ((work_area), C2, C2); \
+ SET_RANGE_TABLE_WORK_AREA (work_area, C2, C2); \
} \
} \
} while (false)
@@ -1445,7 +1458,7 @@ struct range_table_work_area
do { \
int C0, C1, C2, I, USED = RANGE_TABLE_WORK_USED (work_area); \
\
- SET_RANGE_TABLE_WORK_AREA ((work_area), (FROM), (TO)); \
+ SET_RANGE_TABLE_WORK_AREA (work_area, FROM, TO); \
for (C0 = (FROM); C0 <= (TO); C0++) \
{ \
C1 = TRANSLATE (C0); \
@@ -1469,7 +1482,7 @@ struct range_table_work_area
} \
} \
if (I < USED) \
- SET_RANGE_TABLE_WORK_AREA ((work_area), C1, C1); \
+ SET_RANGE_TABLE_WORK_AREA (work_area, C1, C1); \
} \
} while (false)
@@ -1723,7 +1736,8 @@ regex_compile (re_char *pattern, ptrdiff_t size,
/* Address of start of the most recently finished expression.
This tells, e.g., postfix * where to find the start of its
- operand. Reset at the beginning of groups and alternatives. */
+ operand. Reset at the beginning of groups and alternatives,
+ and after ^ and \` for dusty-deck compatibility. */
unsigned char *laststart = 0;
/* Address of beginning of regexp, or inside of last group. */
@@ -1759,7 +1773,7 @@ regex_compile (re_char *pattern, ptrdiff_t size,
if (regex_emacs_debug > 0)
{
for (ptrdiff_t debug_count = 0; debug_count < size; debug_count++)
- debug_putchar (pattern[debug_count]);
+ debug_putchar (stderr, pattern[debug_count]);
putc ('\n', stderr);
}
#endif
@@ -1854,12 +1868,16 @@ regex_compile (re_char *pattern, ptrdiff_t size,
case '^':
if (! (p == pattern + 1 || at_begline_loc_p (pattern, p)))
goto normal_char;
+ /* Special case for compatibility: postfix ops after ^ become
+ literals. */
+ laststart = 0;
BUF_PUSH (begline);
break;
case '$':
if (! (p == pend || at_endline_loc_p (p, pend)))
goto normal_char;
+ laststart = b;
BUF_PUSH (endline);
break;
@@ -1899,7 +1917,7 @@ regex_compile (re_char *pattern, ptrdiff_t size,
/* Star, etc. applied to an empty pattern is equivalent
to an empty pattern. */
- if (!laststart || laststart == b)
+ if (laststart == b)
break;
/* Now we know whether or not zero matches is allowed
@@ -1912,18 +1930,28 @@ regex_compile (re_char *pattern, ptrdiff_t size,
ptrdiff_t startoffset = 0;
re_opcode_t ofj =
/* Check if the loop can match the empty string. */
- (simple || !analyze_first (laststart, b, NULL, false))
+ (simple || !analyze_first (bufp, laststart, b, NULL))
? on_failure_jump : on_failure_jump_loop;
eassert (skip_one_char (laststart) <= b);
if (!zero_times_ok && simple)
{ /* Since simple * loops can be made faster by using
- on_failure_keep_string_jump, we turn simple P+
- into PP* if P is simple. */
+ on_failure_keep_string_jump, we turn P+ into PP*
+ if P is simple.
+ We can't use `top: <BODY>; OFJS exit; J top; exit:`
+ because the OFJS needs to be at the beginning
+ so we can replace
+ top: OFJS exit; <BODY>; J top; exit
+ with
+ OFKSJ exit; loop: <BODY>; J loop; exit
+ i.e. a single OFJ at the beginning of the loop
+ rather than once per iteration. */
unsigned char *p1, *p2;
startoffset = b - laststart;
GET_BUFFER_SPACE (startoffset);
p1 = b; p2 = laststart;
+ /* We presume that the code skipped
+ by `skip_one_char` is position-independent. */
while (p2 < p1)
*b++ = *p2++;
zero_times_ok = 1;
@@ -1959,7 +1987,7 @@ regex_compile (re_char *pattern, ptrdiff_t size,
GET_BUFFER_SPACE (7); /* We might use less. */
if (many_times_ok)
{
- bool emptyp = !!analyze_first (laststart, b, NULL, false);
+ bool emptyp = analyze_first (bufp, laststart, b, NULL);
/* The non-greedy multiple match looks like
a repeat..until: we only need a conditional jump
@@ -2050,13 +2078,6 @@ regex_compile (re_char *pattern, ptrdiff_t size,
is_xdigit, since they can only match ASCII characters.
We don't need to handle them for multibyte. */
- /* Setup the gl_state object to its buffer-defined value.
- This hardcodes the buffer-global syntax-table for ASCII
- chars, while the other chars will obey syntax-table
- properties. It's not ideal, but it's the way it's been
- done until now. */
- SETUP_BUFFER_SYNTAX_TABLE ();
-
for (c = 0; c < 0x80; ++c)
if (re_iswctype (c, cc))
{
@@ -2209,9 +2230,8 @@ regex_compile (re_char *pattern, ptrdiff_t size,
FALLTHROUGH;
case '1': case '2': case '3': case '4':
case '5': case '6': case '7': case '8': case '9':
- if (INT_MULTIPLY_WRAPV (regnum, 10, &regnum)
- || INT_ADD_WRAPV (regnum, c - '0',
- &regnum))
+ if (ckd_mul (&regnum, regnum, 10)
+ || ckd_add (&regnum, regnum, c - '0'))
FREE_STACK_RETURN (REG_ESIZE);
break;
default:
@@ -2552,18 +2572,24 @@ regex_compile (re_char *pattern, ptrdiff_t size,
break;
case 'b':
+ laststart = b;
BUF_PUSH (wordbound);
break;
case 'B':
+ laststart = b;
BUF_PUSH (notwordbound);
break;
case '`':
+ /* Special case for compatibility: postfix ops after \` become
+ literals, as for ^ (see above). */
+ laststart = 0;
BUF_PUSH (begbuf);
break;
case '\'':
+ laststart = b;
BUF_PUSH (endbuf);
break;
@@ -2605,7 +2631,7 @@ regex_compile (re_char *pattern, ptrdiff_t size,
/* If followed by a repetition operator. */
|| (p != pend
- && (*p == '*' || *p == '+' || *p == '?' || *p == '^'))
+ && (*p == '*' || *p == '+' || *p == '?'))
|| (p + 1 < pend && p[0] == '\\' && p[1] == '{'))
{
/* Start building a new exactn. */
@@ -2667,7 +2693,7 @@ regex_compile (re_char *pattern, ptrdiff_t size,
{
re_compile_fastmap (bufp);
DEBUG_PRINT ("\nCompiled pattern:\n");
- print_compiled_pattern (bufp);
+ print_compiled_pattern (stderr, bufp);
}
regex_emacs_debug--;
#endif
@@ -2796,309 +2822,486 @@ group_in_compile_stack (compile_stack_type compile_stack, regnum_t regnum)
return false;
}
-/* analyze_first.
- If fastmap is non-NULL, go through the pattern and fill fastmap
- with all the possible leading chars. If fastmap is NULL, don't
- bother filling it up (obviously) and only return whether the
- pattern could potentially match the empty string.
+/* Iterate through all the char-matching operations directly reachable from P.
+ This is the inner loop of `forall_firstchar`, which see.
+ LOOP_BEG..LOOP_END delimit the currently "block" of code (we assume
+ the code is made of syntactically nested loops).
+ LOOP_END is blindly assumed to be "safe".
+ To guarantee termination, at each iteration, either LOOP_BEG should
+ get bigger, or it should stay the same and P should get bigger. */
+static bool
+forall_firstchar_1 (re_char *p, re_char *pend,
+ re_char *loop_beg, re_char *loop_end,
+ bool f (const re_char *p, void *arg), void *arg)
+{
+ eassert (p >= loop_beg);
+ eassert (p <= loop_end);
- Return 1 if p..pend might match the empty string.
- Return 0 if p..pend matches at least one char.
- Return -1 if fastmap was not updated accurately. */
+ while (true)
+ {
+ re_char *newp1, *newp2, *tmp;
+ re_char *p_orig = p;
+ int offset;
-static int
-analyze_first (re_char *p, re_char *pend, char *fastmap, bool multibyte)
-{
- int j, k;
- int nbits;
- bool not;
+ if (p == pend)
+ return false;
+ else if (p == loop_end)
+ return true;
+ else if (p > loop_end)
+ {
+#if ENABLE_CHECKING
+ fprintf (stderr, "FORALL_FIRSTCHAR: Broken assumption1!!\n");
+#endif
+ return false; /* FIXME: Broken assumption about the code shape. */
+ }
+ else
+ switch (*p)
+ {
+ case no_op:
+ p++; continue;
+
+ /* Cases which stop the iteration. */
+ case succeed:
+ case exactn:
+ case charset:
+ case charset_not:
+ case anychar:
+ case syntaxspec:
+ case notsyntaxspec:
+ case categoryspec:
+ case notcategoryspec:
+ return f (p, arg);
+
+ /* Cases which may match the empty string. */
+ case at_dot:
+ case begbuf:
+ case wordbound:
+ case notwordbound:
+ case begline:
+ case endline:
+ case endbuf:
+ case wordbeg:
+ case wordend:
+ case symbeg:
+ case symend:
+ if (f (p, arg))
+ return true;
+ p++;
+ continue;
+
+ case jump:
+ case jump_n:
+ newp1 = extract_address (p + 1);
+ if (newp1 > p)
+ { /* Forward jump, boring. */
+ p = newp1;
+ continue;
+ }
+ switch (*newp1)
+ {
+ case on_failure_jump:
+ case on_failure_keep_string_jump:
+ case on_failure_jump_nastyloop:
+ case on_failure_jump_loop:
+ case on_failure_jump_smart:
+ case succeed_n:
+ newp2 = extract_address (newp1 + 1);
+ goto do_twoway_jump;
+ default:
+ newp2 = loop_end; /* "Safe" choice. */
+ goto do_jump;
+ }
- /* If all elements for base leading-codes in fastmap is set, this
- flag is set true. */
- bool match_any_multibyte_characters = false;
-
- eassert (p);
-
- /* The loop below works as follows:
- - It has a working-list kept in the PATTERN_STACK and which basically
- starts by only containing a pointer to the first operation.
- - If the opcode we're looking at is a match against some set of
- chars, then we add those chars to the fastmap and go on to the
- next work element from the worklist (done via 'break').
- - If the opcode is a control operator on the other hand, we either
- ignore it (if it's meaningless at this point, such as 'start_memory')
- or execute it (if it's a jump). If the jump has several destinations
- (i.e. 'on_failure_jump'), then we push the other destination onto the
- worklist.
- We guarantee termination by ignoring backward jumps (more or less),
- so that P is monotonically increasing. More to the point, we
- never set P (or push) anything '<= p1'. */
+ case on_failure_jump:
+ case on_failure_keep_string_jump:
+ case on_failure_jump_nastyloop:
+ case on_failure_jump_loop:
+ case on_failure_jump_smart:
+ newp1 = extract_address (p + 1);
+ newp2 = p + 3;
+ /* For `+` loops, we often have an `on_failure_jump` that skips
+ forward over a subsequent `jump`. Recognize this pattern
+ since that subsequent `jump` is the one that jumps to the
+ loop-entry. */
+ if ((re_opcode_t) *newp2 == jump)
+ {
+ re_char *p3 = extract_address (newp2 + 1);
+ /* Only recognize this pattern if one of the two destinations
+ is going forward, otherwise we'll fall into the pessimistic
+ "Both destinations go backward" below.
+ This is important if the `jump` at newp2 is the end of an
+ outer loop while the `on_failure_jump` is the end of an
+ inner loop. */
+ if (p3 > p_orig || newp1 > p_orig)
+ newp2 = p3;
+ }
- while (p < pend)
- {
- /* P1 is used as a marker of how far back a 'on_failure_jump'
- can go without being ignored. It is normally equal to P
- (which prevents any backward 'on_failure_jump') except right
- after a plain 'jump', to allow patterns such as:
- 0: jump 10
- 3..9: <body>
- 10: on_failure_jump 3
- as used for the *? operator. */
- re_char *p1 = p;
+ do_twoway_jump:
+ /* We have to check that both destinations are safe.
+ Arrange for `newp1` to be the smaller of the two. */
+ if (newp1 > newp2)
+ (tmp = newp1, newp1 = newp2, newp2 = tmp);
- switch (*p++)
- {
- case succeed:
- return 1;
+ if (newp2 <= p_orig) /* Both destinations go backward! */
+ {
+#if ENABLE_CHECKING
+ fprintf (stderr, "FORALL_FIRSTCHAR: Broken assumption2!!\n");
+#endif
+ return false;
+ }
- case duplicate:
- /* If the first character has to match a backreference, that means
- that the group was empty (since it already matched). Since this
- is the only case that interests us here, we can assume that the
- backreference must match the empty string. */
- p++;
- continue;
+ if (!forall_firstchar_1 (newp2, pend, loop_beg, loop_end, f, arg))
+ return false;
+ do_jump:
+ eassert (newp2 <= loop_end);
+ if (newp1 <= p_orig)
+ {
+ if (newp1 < loop_beg)
+ {
+#if ENABLE_CHECKING
+ fprintf (stderr, "FORALL_FIRSTCHAR: Broken assumption3!!\n");
+#endif
+ return false;
+ }
+ else if (newp1 == loop_beg)
+ /* If we jump backward to the entry point of the current loop
+ it means it's a zero-length cycle through that loop;
+ this cycle itself does not break safety. */
+ return true;
+ else
+ /* We jump backward to a new loop, nested within the current
+ one. `newp1` is the entry point and `newp2` the exit of
+ that inner loop. */
+ /* `p` gets smaller, but termination is still ensured because
+ `loop_beg` gets bigger. */
+ (loop_beg = newp1, loop_end = newp2);
+ }
+ p = newp1;
+ continue;
+
+ case succeed_n:
+ newp1 = extract_address (p + 1);
+ newp2 = p + 5; /* Skip the two bytes containing the count. */
+ goto do_twoway_jump;
+
+ case set_number_at:
+ offset = extract_number (p + 1);
+ DEBUG_STATEMENT (eassert (extract_number (p + 3)));
+ /* If we're setting the counter of an immediately following
+ `succeed_n`, then this next execution of `succeed_n` will do
+ nothing but decrement its counter and "fall through".
+ So we do the fall through here to avoid considering the
+ "on failure" part of the `succeed_n` which should only be
+ considered when coming from the `jump(_n)` at the end of
+ the loop. */
+ p += (offset == 5 && p[5] == succeed_n) ? 10 : 5;
+ continue;
+
+ case start_memory:
+ case stop_memory:
+ p += 2;
+ continue;
+
+ /* This could match the empty string, so we may need to continue,
+ but in most cases, this can match "anything", so we should
+ return `false` unless told otherwise. */
+ case duplicate:
+ if (!f (p, arg))
+ return false;
+ p += 2;
+ continue;
+
+ default:
+ abort (); /* We have listed all the cases. */
+ }
+ }
+}
- /* Following are the cases which match a character. These end
- with 'break'. */
+/* Iterate through all the char-matching operations directly reachable from P.
+ Return true if P is "safe", meaning that PEND cannot be reached directly
+ from P and all calls to F returned true.
+ Return false if PEND *may* be directly reachable from P or if one of
+ the calls to F returned false.
+ PEND can be NULL (and hence never reachable).
+
+ Call `F (POS, ARG)` for every POS directly reachable from P,
+ before reaching PEND, where POS is the position of a char-matching
+ operation (`exactn`, `charset`, ...).
+
+ For operations that match the empty string (`wordbeg`, ...), if F
+ returns true we stop going down that path immediately but if it returns
+ false we don't consider it as a failure and we simply look for the
+ next char-matching operations on that path.
+ For `duplicate`, it is the reverse: a false is an immediate failure
+ whereas a true just lets the analysis continue with the rest of the path.
+
+ This function can be used while building the bytecode (in which case
+ you should pass NULL for bufp), but if so, P and PEND need to delimit
+ a valid block such that there is not jump to a location outside
+ of [P...PEND]. */
+static bool
+forall_firstchar (struct re_pattern_buffer *bufp, re_char *p, re_char *pend,
+ bool f (re_char *p, void *arg), void *arg)
+{
+ eassert (!bufp || bufp->used);
+ eassert (pend || bufp->used);
+ return forall_firstchar_1 (p, pend,
+ bufp ? bufp->buffer - 1 : p,
+ bufp ? bufp->buffer + bufp->used + 1 : pend,
+ f, arg);
+}
- case exactn:
- if (fastmap)
- {
- /* If multibyte is nonzero, the first byte of each
- character is an ASCII or a leading code. Otherwise,
- each byte is a character. Thus, this works in both
- cases. */
- fastmap[p[1]] = 1;
- if (multibyte)
- {
- /* Cover the case of matching a raw char in a
- multibyte regexp against unibyte. */
- if (CHAR_BYTE8_HEAD_P (p[1]))
- fastmap[CHAR_TO_BYTE8 (STRING_CHAR (p + 1))] = 1;
- }
- else
- {
- /* For the case of matching this unibyte regex
- against multibyte, we must set a leading code of
- the corresponding multibyte character. */
- int c = RE_CHAR_TO_MULTIBYTE (p[1]);
+struct anafirst_data {
+ bool multibyte;
+ char *fastmap;
+ bool match_any_multibyte_characters;
+};
- fastmap[CHAR_LEADING_CODE (c)] = 1;
- }
- }
- break;
+static bool
+analyze_first_fastmap (const re_char *p, void *arg)
+{
+ struct anafirst_data *data = arg;
+ int j, k;
+ int nbits;
+ bool not;
- case anychar:
- /* We could put all the chars except for \n (and maybe \0)
- but we don't bother since it is generally not worth it. */
- if (!fastmap) break;
- return -1;
+ switch (*p)
+ {
+ case succeed:
+ return false;
+ case duplicate:
+ /* If the first character has to match a backreference, that means
+ that the group was empty (since it already matched). Since this
+ is the only case that interests us here, we can assume that the
+ backreference must match the empty string and we need to
+ build the fastmap from the rest of the path. */
+ return true;
- case charset_not:
- if (!fastmap) break;
- {
- /* Chars beyond end of bitmap are possible matches. */
- for (j = CHARSET_BITMAP_SIZE (&p[-1]) * BYTEWIDTH;
- j < (1 << BYTEWIDTH); j++)
- fastmap[j] = 1;
- }
- FALLTHROUGH;
- case charset:
- if (!fastmap) break;
- not = (re_opcode_t) *(p - 1) == charset_not;
- nbits = CHARSET_BITMAP_SIZE (&p[-1]) * BYTEWIDTH;
- p++;
- for (j = 0; j < nbits; j++)
- if (!!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))) ^ not)
- fastmap[j] = 1;
-
- /* To match raw bytes (in the 80..ff range) against multibyte
- strings, add their leading bytes to the fastmap. */
- for (j = 0x80; j < nbits; j++)
- if (!!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))) ^ not)
- fastmap[CHAR_LEADING_CODE (BYTE8_TO_CHAR (j))] = 1;
-
- if (/* Any leading code can possibly start a character
- which doesn't match the specified set of characters. */
- not
- ||
- /* If we can match a character class, we can match any
- multibyte characters. */
- (CHARSET_RANGE_TABLE_EXISTS_P (&p[-2])
- && CHARSET_RANGE_TABLE_BITS (&p[-2]) != 0))
+ /* Following are the cases which match a character. These end
+ with 'break'. */
- {
- if (match_any_multibyte_characters == false)
- {
- for (j = MIN_MULTIBYTE_LEADING_CODE;
- j <= MAX_MULTIBYTE_LEADING_CODE; j++)
- fastmap[j] = 1;
- match_any_multibyte_characters = true;
- }
- }
+ case exactn:
+ p++;
+ /* If multibyte is nonzero, the first byte of each
+ character is an ASCII or a leading code. Otherwise,
+ each byte is a character. Thus, this works in both
+ cases. */
+ data->fastmap[p[1]] = 1;
+ if (data->multibyte)
+ {
+ /* Cover the case of matching a raw char in a
+ multibyte regexp against unibyte. */
+ if (CHAR_BYTE8_HEAD_P (p[1]))
+ data->fastmap[CHAR_TO_BYTE8 (STRING_CHAR (p + 1))] = 1;
+ }
+ else
+ {
+ /* For the case of matching this unibyte regex
+ against multibyte, we must set a leading code of
+ the corresponding multibyte character. */
+ int c = RE_CHAR_TO_MULTIBYTE (p[1]);
- else if (!not && CHARSET_RANGE_TABLE_EXISTS_P (&p[-2])
- && match_any_multibyte_characters == false)
- {
- /* Set fastmap[I] to 1 where I is a leading code of each
- multibyte character in the range table. */
- int c, count;
- unsigned char lc1, lc2;
-
- /* Make P points the range table. '+ 2' is to skip flag
- bits for a character class. */
- p += CHARSET_BITMAP_SIZE (&p[-2]) + 2;
-
- /* Extract the number of ranges in range table into COUNT. */
- EXTRACT_NUMBER_AND_INCR (count, p);
- for (; count > 0; count--, p += 3)
- {
- /* Extract the start and end of each range. */
- EXTRACT_CHARACTER (c, p);
- lc1 = CHAR_LEADING_CODE (c);
- p += 3;
- EXTRACT_CHARACTER (c, p);
- lc2 = CHAR_LEADING_CODE (c);
- for (j = lc1; j <= lc2; j++)
- fastmap[j] = 1;
- }
- }
- break;
+ data->fastmap[CHAR_LEADING_CODE (c)] = 1;
+ }
+ return true;
- case syntaxspec:
- case notsyntaxspec:
- if (!fastmap) break;
- /* This match depends on text properties. These end with
- aborting optimizations. */
- return -1;
+ case anychar:
+ /* We could put all the chars except for \n (and maybe \0)
+ but we don't bother since it is generally not worth it. */
+ return false;
- case categoryspec:
- case notcategoryspec:
- if (!fastmap) break;
- not = (re_opcode_t)p[-1] == notcategoryspec;
- k = *p++;
- for (j = (1 << BYTEWIDTH); j >= 0; j--)
- if ((CHAR_HAS_CATEGORY (j, k)) ^ not)
- fastmap[j] = 1;
-
- /* Any leading code can possibly start a character which
- has or doesn't has the specified category. */
- if (match_any_multibyte_characters == false)
+ case charset_not:
+ {
+ /* Chars beyond end of bitmap are possible matches. */
+ for (j = CHARSET_BITMAP_SIZE (p) * BYTEWIDTH;
+ j < (1 << BYTEWIDTH); j++)
+ data->fastmap[j] = 1;
+ }
+ FALLTHROUGH;
+ case charset:
+ not = (re_opcode_t) *(p) == charset_not;
+ nbits = CHARSET_BITMAP_SIZE (p) * BYTEWIDTH;
+ p += 2;
+ for (j = 0; j < nbits; j++)
+ if (!!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))) ^ not)
+ data->fastmap[j] = 1;
+
+ /* To match raw bytes (in the 80..ff range) against multibyte
+ strings, add their leading bytes to the fastmap. */
+ for (j = 0x80; j < nbits; j++)
+ if (!!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))) ^ not)
+ data->fastmap[CHAR_LEADING_CODE (BYTE8_TO_CHAR (j))] = 1;
+
+ if (/* Any leading code can possibly start a character
+ which doesn't match the specified set of characters. */
+ not
+ ||
+ /* If we can match a character class, we can match any
+ multibyte characters. */
+ (CHARSET_RANGE_TABLE_EXISTS_P (&p[-2])
+ && CHARSET_RANGE_TABLE_BITS (&p[-2]) != 0))
+
+ {
+ if (!data->match_any_multibyte_characters)
{
for (j = MIN_MULTIBYTE_LEADING_CODE;
- j <= MAX_MULTIBYTE_LEADING_CODE; j++)
- fastmap[j] = 1;
- match_any_multibyte_characters = true;
+ j <= MAX_MULTIBYTE_LEADING_CODE; j++)
+ data->fastmap[j] = 1;
+ data->match_any_multibyte_characters = true;
}
- break;
-
- /* All cases after this match the empty string. These end with
- 'continue'. */
-
- case at_dot:
- case no_op:
- case begline:
- case endline:
- case begbuf:
- case endbuf:
- case wordbound:
- case notwordbound:
- case wordbeg:
- case wordend:
- case symbeg:
- case symend:
- continue;
-
+ }
- case jump:
- EXTRACT_NUMBER_AND_INCR (j, p);
- if (j < 0)
- /* Backward jumps can only go back to code that we've already
- visited. 're_compile' should make sure this is true. */
- break;
- p += j;
- switch (*p)
+ else if (!not && CHARSET_RANGE_TABLE_EXISTS_P (&p[-2])
+ && data->match_any_multibyte_characters == false)
+ {
+ /* Set fastmap[I] to 1 where I is a leading code of each
+ multibyte character in the range table. */
+ int c, count;
+ unsigned char lc1, lc2;
+
+ /* Make P points the range table. '+ 2' is to skip flag
+ bits for a character class. */
+ p += CHARSET_BITMAP_SIZE (&p[-2]) + 2;
+
+ /* Extract the number of ranges in range table into COUNT. */
+ EXTRACT_NUMBER_AND_INCR (count, p);
+ for (; count > 0; count--, p += 3)
{
- case on_failure_jump:
- case on_failure_keep_string_jump:
- case on_failure_jump_loop:
- case on_failure_jump_nastyloop:
- case on_failure_jump_smart:
- p++;
- break;
- default:
- continue;
- };
- /* Keep P1 to allow the 'on_failure_jump' we are jumping to
- to jump back to "just after here". */
- FALLTHROUGH;
- case on_failure_jump:
- case on_failure_keep_string_jump:
- case on_failure_jump_nastyloop:
- case on_failure_jump_loop:
- case on_failure_jump_smart:
- EXTRACT_NUMBER_AND_INCR (j, p);
- if (p + j <= p1)
- ; /* Backward jump to be ignored. */
- else
- { /* We have to look down both arms.
- We first go down the "straight" path so as to minimize
- stack usage when going through alternatives. */
- int r = analyze_first (p, pend, fastmap, multibyte);
- if (r) return r;
- p += j;
+ /* Extract the start and end of each range. */
+ EXTRACT_CHARACTER (c, p);
+ lc1 = CHAR_LEADING_CODE (c);
+ p += 3;
+ EXTRACT_CHARACTER (c, p);
+ lc2 = CHAR_LEADING_CODE (c);
+ for (j = lc1; j <= lc2; j++)
+ data->fastmap[j] = 1;
}
- continue;
+ }
+ return true;
+ case syntaxspec:
+ case notsyntaxspec:
+ /* This match depends on text properties. These end with
+ aborting optimizations. */
+ return false;
- case jump_n:
- /* This code simply does not properly handle forward jump_n. */
- DEBUG_STATEMENT (EXTRACT_NUMBER (j, p); eassert (j < 0));
- p += 4;
- /* jump_n can either jump or fall through. The (backward) jump
- case has already been handled, so we only need to look at the
- fallthrough case. */
- continue;
+ case categoryspec:
+ case notcategoryspec:
+ not = (re_opcode_t)p[0] == notcategoryspec;
+ p++;
+ k = *p++;
+ for (j = (1 << BYTEWIDTH); j >= 0; j--)
+ if ((CHAR_HAS_CATEGORY (j, k)) ^ not)
+ data->fastmap[j] = 1;
+
+ /* Any leading code can possibly start a character which
+ has or doesn't has the_malloc_fn specified category. */
+ if (!data->match_any_multibyte_characters)
+ {
+ for (j = MIN_MULTIBYTE_LEADING_CODE;
+ j <= MAX_MULTIBYTE_LEADING_CODE; j++)
+ data->fastmap[j] = 1;
+ data->match_any_multibyte_characters = true;
+ }
+ return true;
- case succeed_n:
- /* If N == 0, it should be an on_failure_jump_loop instead. */
- DEBUG_STATEMENT (EXTRACT_NUMBER (j, p + 2); eassert (j > 0));
- p += 4;
- /* We only care about one iteration of the loop, so we don't
- need to consider the case where this behaves like an
- on_failure_jump. */
- continue;
+ case at_dot:
+ case begbuf:
+ case wordbound:
+ case notwordbound:
+ case begline:
+ case endline:
+ case endbuf:
+ case wordbeg:
+ case wordend:
+ case symbeg:
+ case symend:
+ /* This false doesn't mean failure but rather "not succeeded yet". */
+ return false;
+ default:
+#if ENABLE_CHECKING
+ abort (); /* We have listed all the cases. */
+#endif
+ return false;
+ }
+}
- case set_number_at:
- p += 4;
- continue;
+static bool
+analyze_first_null (const re_char *p, void *arg)
+{
+ switch (*p)
+ {
+ case succeed:
+ /* This is safe: we can't reach `pend` at all from here. */
+ return true;
+ case duplicate:
+ /* Either `duplicate` ends up matching a non-empty string, in which
+ case we're good, or it matches the empty string, in which case we
+ need to continue checking the rest of this path, which is exactly
+ what returning `true` does, here. */
+ return true;
- case start_memory:
- case stop_memory:
- p += 1;
- continue;
+ case exactn:
+ case anychar:
+ case charset_not:
+ case charset:
+ case syntaxspec:
+ case notsyntaxspec:
+ case categoryspec:
+ case notcategoryspec:
+ return true;
+ case at_dot:
+ case begbuf:
+ case wordbound:
+ case notwordbound:
+ case begline:
+ case endline:
+ case endbuf:
+ case wordbeg:
+ case wordend:
+ case symbeg:
+ case symend:
+ /* This false doesn't mean failure but rather "not succeeded yet". */
+ return false;
- default:
- abort (); /* We have listed all the cases. */
- } /* switch *p++ */
+ default:
+#if ENABLE_CHECKING
+ abort (); /* We have listed all the cases. */
+#endif
+ return false;
+ }
+}
+
+/* analyze_first.
+ If fastmap is non-NULL, go through the pattern and fill fastmap
+ with all the possible leading chars. If fastmap is NULL, don't
+ bother filling it up (obviously) and only return whether the
+ pattern could potentially match the empty string.
- /* Getting here means we have found the possible starting
- characters for one path of the pattern -- and that the empty
- string does not match. We need not follow this path further. */
- return 0;
- } /* while p */
+ Return false if p matches at least one char before reaching pend.
+ Return true if p..pend might match the empty string
+ or if fastmap was not updated accurately. */
- /* We reached the end without matching anything. */
- return 1;
+static bool
+analyze_first (struct re_pattern_buffer *bufp,
+ re_char *p, re_char *pend, char *fastmap)
+{
+ eassert (pend);
+ struct anafirst_data data = { bufp ? bufp->multibyte : false,
+ fastmap, false };
+ bool safe = forall_firstchar (bufp->used ? bufp : NULL, p, pend,
+ fastmap ? analyze_first_fastmap
+ : analyze_first_null,
+ &data);
+ return !safe;
+}
-} /* analyze_first */
/* Compute a fastmap for the compiled pattern in BUFP.
A fastmap records which of the (1 << BYTEWIDTH) possible
@@ -3119,7 +3322,6 @@ static void
re_compile_fastmap (struct re_pattern_buffer *bufp)
{
char *fastmap = bufp->fastmap;
- int analysis;
eassert (fastmap && bufp->buffer);
@@ -3128,9 +3330,8 @@ re_compile_fastmap (struct re_pattern_buffer *bufp)
/* FIXME: Is the following assignment correct even when ANALYSIS < 0? */
bufp->fastmap_accurate = 1; /* It will be when we're done. */
- analysis = analyze_first (bufp->buffer, bufp->buffer + bufp->used,
- fastmap, RE_MULTIBYTE_P (bufp));
- bufp->can_be_null = (analysis != 0);
+ bufp->can_be_null = analyze_first (bufp, bufp->buffer,
+ bufp->buffer + bufp->used, fastmap);
} /* re_compile_fastmap */
/* Set REGS to hold NUM_REGS registers, storing them in STARTS and
@@ -3168,7 +3369,7 @@ re_set_registers (struct re_pattern_buffer *bufp, struct re_registers *regs,
/* Searching routines. */
/* Like re_search_2, below, but only one string is specified, and
- doesn't let you say where to stop matching. */
+ doesn't let you say where to stop matching. */
ptrdiff_t
re_search (struct re_pattern_buffer *bufp, const char *string, ptrdiff_t size,
@@ -3258,12 +3459,7 @@ re_search_2 (struct re_pattern_buffer *bufp, const char *str1, ptrdiff_t size1,
/* See whether the pattern is anchored. */
anchored_start = (bufp->buffer[0] == begline);
- gl_state.object = re_match_object; /* Used by SYNTAX_TABLE_BYTE_TO_CHAR. */
- {
- ptrdiff_t charpos = SYNTAX_TABLE_BYTE_TO_CHAR (POS_AS_IN_BUFFER (startpos));
-
- SETUP_SYNTAX_TABLE_FOR_OBJECT (re_match_object, charpos, 1);
- }
+ RE_SETUP_SYNTAX_TABLE_FOR_OBJECT (re_match_object, startpos);
/* Loop through the string, looking for a place to start matching. */
for (;;)
@@ -3550,33 +3746,6 @@ skip_one_char (re_char *p)
}
-/* Jump over non-matching operations. */
-static re_char *
-skip_noops (re_char *p, re_char *pend)
-{
- int mcnt;
- while (p < pend)
- {
- switch (*p)
- {
- case start_memory:
- case stop_memory:
- p += 2; break;
- case no_op:
- p += 1; break;
- case jump:
- p += 1;
- EXTRACT_NUMBER_AND_INCR (mcnt, p);
- p += mcnt;
- break;
- default:
- return p;
- }
- }
- eassert (p == pend);
- return p;
-}
-
/* Test if C matches charset op. *PP points to the charset or charset_not
opcode. When the function finishes, *PP will be advanced past that opcode.
C is character to test (possibly after translations) and CORIG is original
@@ -3596,7 +3765,7 @@ execute_charset (re_char **pp, int c, int corig, bool unibyte,
int count;
rtp = CHARSET_RANGE_TABLE (p);
EXTRACT_NUMBER_AND_INCR (count, rtp);
- *pp = CHARSET_RANGE_TABLE_END ((rtp), (count));
+ *pp = CHARSET_RANGE_TABLE_END (rtp, count);
}
else
*pp += 2 + CHARSET_BITMAP_SIZE (p);
@@ -3645,92 +3814,55 @@ execute_charset (re_char **pp, int c, int corig, bool unibyte,
return not;
}
-/* True if "p1 matches something" implies "p2 fails". */
+/* Case where `p2` points to an `exactn` or `endline`. */
static bool
-mutually_exclusive_p (struct re_pattern_buffer *bufp, re_char *p1,
- re_char *p2)
+mutually_exclusive_exactn (struct re_pattern_buffer *bufp, re_char *p1,
+ re_char *p2)
{
- re_opcode_t op2;
bool multibyte = RE_MULTIBYTE_P (bufp);
- unsigned char *pend = bufp->buffer + bufp->used;
- re_char *p2_orig = p2;
+ int c
+ = (re_opcode_t) *p2 == endline ? '\n'
+ : RE_STRING_CHAR (p2 + 2, multibyte);
- eassert (p1 >= bufp->buffer && p1 < pend
- && p2 >= bufp->buffer && p2 <= pend);
-
- /* Skip over open/close-group commands.
- If what follows this loop is a ...+ construct,
- look at what begins its body, since we will have to
- match at least one of that. */
- p2 = skip_noops (p2, pend);
- /* The same skip can be done for p1, except that this function
- is only used in the case where p1 is a simple match operator. */
- /* p1 = skip_noops (p1, pend); */
-
- eassert (p1 >= bufp->buffer && p1 < pend
- && p2 >= bufp->buffer && p2 <= pend);
-
- op2 = p2 == pend ? succeed : *p2;
-
- switch (op2)
+ if ((re_opcode_t) *p1 == exactn)
{
- case succeed:
- case endbuf:
- /* If we're at the end of the pattern, we can change. */
- if (skip_one_char (p1))
+ if (c != RE_STRING_CHAR (p1 + 2, multibyte))
{
- DEBUG_PRINT (" End of pattern: fast loop.\n");
+ DEBUG_PRINT (" '%c' != '%c' => fast loop.\n", c, p1[2]);
return true;
}
- break;
-
- case endline:
- case exactn:
- {
- int c
- = (re_opcode_t) *p2 == endline ? '\n'
- : RE_STRING_CHAR (p2 + 2, multibyte);
-
- if ((re_opcode_t) *p1 == exactn)
- {
- if (c != RE_STRING_CHAR (p1 + 2, multibyte))
- {
- DEBUG_PRINT (" '%c' != '%c' => fast loop.\n", c, p1[2]);
- return true;
- }
- }
-
- else if ((re_opcode_t) *p1 == charset
- || (re_opcode_t) *p1 == charset_not)
- {
- if (!execute_charset (&p1, c, c, !multibyte || ASCII_CHAR_P (c),
- Qnil))
- {
- DEBUG_PRINT (" No match => fast loop.\n");
- return true;
- }
- }
- else if ((re_opcode_t) *p1 == anychar
- && c == '\n')
- {
- DEBUG_PRINT (" . != \\n => fast loop.\n");
- return true;
- }
- }
- break;
+ }
- case charset:
- {
- if ((re_opcode_t) *p1 == exactn)
- /* Reuse the code above. */
- return mutually_exclusive_p (bufp, p2, p1);
-
- /* It is hard to list up all the character in charset
- P2 if it includes multibyte character. Give up in
- such case. */
- else if (!multibyte || !CHARSET_RANGE_TABLE_EXISTS_P (p2))
+ else if ((re_opcode_t) *p1 == charset
+ || (re_opcode_t) *p1 == charset_not)
+ {
+ if (!execute_charset (&p1, c, c, !multibyte || ASCII_CHAR_P (c),
+ Qnil))
{
- /* Now, we are sure that P2 has no range table.
+ DEBUG_PRINT (" No match => fast loop.\n");
+ return true;
+ }
+ }
+ else if ((re_opcode_t) *p1 == anychar
+ && c == '\n')
+ {
+ DEBUG_PRINT (" . != \\n => fast loop.\n");
+ return true;
+ }
+ return false;
+}
+
+/* Case where `p2` points to an `charset`. */
+static bool
+mutually_exclusive_charset (struct re_pattern_buffer *bufp, re_char *p1,
+ re_char *p2)
+{
+ /* It is hard to list up all the character in charset
+ P2 if it includes multibyte character. Give up in
+ such case. */
+ if (!RE_MULTIBYTE_P (bufp) || !CHARSET_RANGE_TABLE_EXISTS_P (p2))
+ {
+ /* Now, we are sure that P2 has no range table.
So, for the size of bitmap in P2, 'p2[1]' is
enough. But P1 may have range table, so the
size of bitmap table of P1 is extracted by
@@ -3741,113 +3873,161 @@ mutually_exclusive_p (struct re_pattern_buffer *bufp, re_char *p1,
bitmap table. So, in both cases, it is enough to test
only the bitmap table of P1. */
- if ((re_opcode_t) *p1 == charset)
- {
- int idx;
- /* We win if the charset inside the loop
+ if ((re_opcode_t) *p1 == charset)
+ {
+ int idx;
+ /* We win if the charset inside the loop
has no overlap with the one after the loop. */
- for (idx = 0;
- (idx < (int) p2[1]
- && idx < CHARSET_BITMAP_SIZE (p1));
- idx++)
- if ((p2[2 + idx] & p1[2 + idx]) != 0)
- break;
+ for (idx = 0;
+ (idx < (int) p2[1]
+ && idx < CHARSET_BITMAP_SIZE (p1));
+ idx++)
+ if ((p2[2 + idx] & p1[2 + idx]) != 0)
+ break;
- if (idx == p2[1]
- || idx == CHARSET_BITMAP_SIZE (p1))
- {
- DEBUG_PRINT (" No match => fast loop.\n");
- return true;
- }
- }
- else if ((re_opcode_t) *p1 == charset_not)
+ if (idx == p2[1]
+ || idx == CHARSET_BITMAP_SIZE (p1))
{
- int idx;
- /* We win if the charset_not inside the loop lists
+ DEBUG_PRINT (" No match => fast loop.\n");
+ return true;
+ }
+ }
+ else if ((re_opcode_t) *p1 == charset_not)
+ {
+ int idx;
+ /* We win if the charset_not inside the loop lists
every character listed in the charset after. */
- for (idx = 0; idx < (int) p2[1]; idx++)
- if (! (p2[2 + idx] == 0
- || (idx < CHARSET_BITMAP_SIZE (p1)
- && ((p2[2 + idx] & ~ p1[2 + idx]) == 0))))
- break;
+ for (idx = 0; idx < (int) p2[1]; idx++)
+ if (! (p2[2 + idx] == 0
+ || (idx < CHARSET_BITMAP_SIZE (p1)
+ && ((p2[2 + idx] & ~ p1[2 + idx]) == 0))))
+ break;
- if (idx == p2[1])
- {
- DEBUG_PRINT (" No match => fast loop.\n");
- return true;
- }
- }
- }
+ if (idx == p2[1])
+ {
+ DEBUG_PRINT (" No match => fast loop.\n");
+ return true;
+ }
+ }
+ }
+ return false;
+}
+
+struct mutexcl_data {
+ struct re_pattern_buffer *bufp;
+ re_char *p1;
+ bool unconstrained;
+};
+
+static bool
+mutually_exclusive_one (re_char *p2, void *arg)
+{
+ struct mutexcl_data *data = arg;
+ switch (*p2)
+ {
+ case succeed:
+ /* If `p1` matches, `succeed` can still match, so we should return
+ `false`. *BUT* when N iterations of `p1` and N+1 iterations of `p1`
+ match, the `succeed` that comes after N+1 always takes precedence
+ over the one after N because we always prefer a longer match, so
+ the succeed after N can actually be replaced by a "fail" without
+ changing the end result.
+ In this sense, "if `p1` matches, `succeed` can't match".
+ So we can return `true`.
+ *BUT* this only holds if we're sure that the N+1 will indeed succeed,
+ so we need to make sure there is no other matching operator between
+ the exit of the iteration and the `succeed`. */
+ return data->unconstrained;
+
+/* Remember that there may be an empty matching operator on the way.
+ If we return true, this is the "end" of this control flow path,
+ so it can't get in the way of a subsequent `succeed. */
+#define RETURN_CONSTRAIN(v) \
+ { bool tmp = (v); \
+ if (!tmp) \
+ data->unconstrained = false; \
+ return tmp; \
+ }
+
+ case endline:
+ RETURN_CONSTRAIN (mutually_exclusive_exactn (data->bufp, data->p1, p2));
+ case exactn:
+ return mutually_exclusive_exactn (data->bufp, data->p1, p2);
+ case charset:
+ {
+ if (*data->p1 == exactn)
+ return mutually_exclusive_exactn (data->bufp, p2, data->p1);
+ else
+ return mutually_exclusive_charset (data->bufp, data->p1, p2);
}
- break;
case charset_not:
- switch (*p1)
+ switch (*data->p1)
{
case exactn:
+ return mutually_exclusive_exactn (data->bufp, p2, data->p1);
case charset:
- /* Reuse the code above. */
- return mutually_exclusive_p (bufp, p2, p1);
+ return mutually_exclusive_charset (data->bufp, p2, data->p1);
case charset_not:
/* When we have two charset_not, it's very unlikely that
they don't overlap. The union of the two sets of excluded
chars should cover all possible chars, which, as a matter of
fact, is virtually impossible in multibyte buffers. */
- break;
+ return false;
}
- break;
-
- case wordend:
- return ((re_opcode_t) *p1 == syntaxspec && p1[1] == Sword);
- case symend:
- return ((re_opcode_t) *p1 == syntaxspec
- && (p1[1] == Ssymbol || p1[1] == Sword));
+ return false;
+ case anychar:
+ return false; /* FIXME: exactn \n ? */
+ case syntaxspec:
+ return (*data->p1 == notsyntaxspec && data->p1[1] == p2[1]);
case notsyntaxspec:
- return ((re_opcode_t) *p1 == syntaxspec && p1[1] == p2[1]);
+ return (*data->p1 == syntaxspec && data->p1[1] == p2[1]);
+ case categoryspec:
+ return (*data->p1 == notcategoryspec && data->p1[1] == p2[1]);
+ case notcategoryspec:
+ return (*data->p1 == categoryspec && data->p1[1] == p2[1]);
+ case endbuf:
+ return true;
case wordbeg:
- return ((re_opcode_t) *p1 == notsyntaxspec && p1[1] == Sword);
+ RETURN_CONSTRAIN (*data->p1 == notsyntaxspec && data->p1[1] == Sword);
+ case wordend:
+ RETURN_CONSTRAIN (*data->p1 == syntaxspec && data->p1[1] == Sword);
case symbeg:
- return ((re_opcode_t) *p1 == notsyntaxspec
- && (p1[1] == Ssymbol || p1[1] == Sword));
- case syntaxspec:
- return ((re_opcode_t) *p1 == notsyntaxspec && p1[1] == p2[1]);
+ RETURN_CONSTRAIN (*data->p1 == notsyntaxspec
+ && (data->p1[1] == Ssymbol || data->p1[1] == Sword));
+ case symend:
+ RETURN_CONSTRAIN (*data->p1 == syntaxspec
+ && (data->p1[1] == Ssymbol || data->p1[1] == Sword));
+ case at_dot:
+ case begbuf:
case wordbound:
- return (((re_opcode_t) *p1 == notsyntaxspec
- || (re_opcode_t) *p1 == syntaxspec)
- && p1[1] == Sword);
-
- case categoryspec:
- return ((re_opcode_t) *p1 == notcategoryspec && p1[1] == p2[1]);
- case notcategoryspec:
- return ((re_opcode_t) *p1 == categoryspec && p1[1] == p2[1]);
+ case notwordbound:
+ case begline:
+ RETURN_CONSTRAIN (false);
- case on_failure_jump_nastyloop:
- case on_failure_jump_smart:
- case on_failure_jump_loop:
- case on_failure_keep_string_jump:
- case on_failure_jump:
- {
- int mcnt;
- p2++;
- EXTRACT_NUMBER_AND_INCR (mcnt, p2);
- /* Don't just test `mcnt > 0` because non-greedy loops have
- their test at the end with an unconditional jump at the start. */
- if (p2 > p2_orig && mcnt >= 0) /* Ensure forward progress. */
- return (mutually_exclusive_p (bufp, p1, p2)
- && mutually_exclusive_p (bufp, p1, p2 + mcnt));
- break;
- }
+ case duplicate:
+ /* At this point, we know nothing about what this can match, sadly. */
+ return false;
default:
- ;
+#if ENABLE_CHECKING
+ abort (); /* We have listed all the cases. */
+#endif
+ return false;
}
-
- /* Safe default. */
- return false;
}
+/* True if "p1 matches something" implies "p2 fails". */
+
+static bool
+mutually_exclusive_p (struct re_pattern_buffer *bufp, re_char *p1,
+ re_char *p2)
+{
+ struct mutexcl_data data = { bufp, p1, true };
+ return forall_firstchar (bufp, p2, NULL, mutually_exclusive_one, &data);
+}
/* Matching routines. */
@@ -3871,10 +4051,7 @@ re_match_2 (struct re_pattern_buffer *bufp,
{
ptrdiff_t result;
- ptrdiff_t charpos;
- gl_state.object = re_match_object; /* Used by SYNTAX_TABLE_BYTE_TO_CHAR. */
- charpos = SYNTAX_TABLE_BYTE_TO_CHAR (POS_AS_IN_BUFFER (pos));
- SETUP_SYNTAX_TABLE_FOR_OBJECT (re_match_object, charpos, 1);
+ RE_SETUP_SYNTAX_TABLE_FOR_OBJECT (re_match_object, pos);
result = re_match_2_internal (bufp, (re_char *) string1, size1,
(re_char *) string2, size2,
@@ -3979,6 +4156,9 @@ re_match_2_internal (struct re_pattern_buffer *bufp,
/* This keeps track of how many buffer/string positions we examined. */
ptrdiff_t nchars = 0;
+ /* Final return value of the function. */
+ ptrdiff_t retval = -1; /* Presumes failure to match for now. */
+
#ifdef DEBUG_COMPILES_ARGUMENTS
/* Counts the total number of registers pushed. */
ptrdiff_t num_regs_pushed = 0;
@@ -4233,15 +4413,8 @@ re_match_2_internal (struct re_pattern_buffer *bufp,
DEBUG_PRINT ("Returning %td from re_match_2.\n", dcnt);
- unbind_to (count, Qnil);
- SAFE_FREE ();
- /* The factor of 50 below is a heuristic that needs to be tuned. It
- means we consider 50 buffer positions examined by this function
- roughly equivalent to the display engine iterating over a single
- buffer position. */
- if (max_redisplay_ticks > 0 && nchars > 0)
- update_redisplay_ticks (nchars / 50 + 1, NULL);
- return dcnt;
+ retval = dcnt;
+ goto endof_re_match;
}
/* Otherwise match next pattern command. */
@@ -4734,7 +4907,7 @@ re_match_2_internal (struct re_pattern_buffer *bufp,
/* Have to succeed matching what follows at least n times.
- After that, handle like 'on_failure_jump'. */
+ After that, handle like 'on_failure_jump_loop'. */
case succeed_n:
/* Signedness doesn't matter since we only compare MCNT to 0. */
EXTRACT_NUMBER (mcnt, p + 2);
@@ -4806,8 +4979,8 @@ re_match_2_internal (struct re_pattern_buffer *bufp,
int c1, c2;
int s1, s2;
int dummy;
- ptrdiff_t offset = PTR_TO_OFFSET (d);
- ptrdiff_t charpos = SYNTAX_TABLE_BYTE_TO_CHAR (offset) - 1;
+ ptrdiff_t offset = POINTER_TO_OFFSET (d);
+ ptrdiff_t charpos = RE_SYNTAX_TABLE_BYTE_TO_CHAR (offset) - 1;
UPDATE_SYNTAX_TABLE (charpos);
GET_CHAR_BEFORE_2 (c1, d, string1, end1, string2, end2);
nchars++;
@@ -4846,8 +5019,8 @@ re_match_2_internal (struct re_pattern_buffer *bufp,
int c1, c2;
int s1, s2;
int dummy;
- ptrdiff_t offset = PTR_TO_OFFSET (d);
- ptrdiff_t charpos = SYNTAX_TABLE_BYTE_TO_CHAR (offset);
+ ptrdiff_t offset = POINTER_TO_OFFSET (d);
+ ptrdiff_t charpos = RE_SYNTAX_TABLE_BYTE_TO_CHAR (offset);
UPDATE_SYNTAX_TABLE (charpos);
PREFETCH ();
GET_CHAR_AFTER (c2, d, dummy);
@@ -4889,8 +5062,8 @@ re_match_2_internal (struct re_pattern_buffer *bufp,
int c1, c2;
int s1, s2;
int dummy;
- ptrdiff_t offset = PTR_TO_OFFSET (d);
- ptrdiff_t charpos = SYNTAX_TABLE_BYTE_TO_CHAR (offset) - 1;
+ ptrdiff_t offset = POINTER_TO_OFFSET (d);
+ ptrdiff_t charpos = RE_SYNTAX_TABLE_BYTE_TO_CHAR (offset) - 1;
UPDATE_SYNTAX_TABLE (charpos);
GET_CHAR_BEFORE_2 (c1, d, string1, end1, string2, end2);
nchars++;
@@ -4931,8 +5104,8 @@ re_match_2_internal (struct re_pattern_buffer *bufp,
is the character at D, and S2 is the syntax of C2. */
int c1, c2;
int s1, s2;
- ptrdiff_t offset = PTR_TO_OFFSET (d);
- ptrdiff_t charpos = SYNTAX_TABLE_BYTE_TO_CHAR (offset);
+ ptrdiff_t offset = POINTER_TO_OFFSET (d);
+ ptrdiff_t charpos = RE_SYNTAX_TABLE_BYTE_TO_CHAR (offset);
UPDATE_SYNTAX_TABLE (charpos);
PREFETCH ();
c2 = RE_STRING_CHAR (d, target_multibyte);
@@ -4972,8 +5145,8 @@ re_match_2_internal (struct re_pattern_buffer *bufp,
is the character at D, and S2 is the syntax of C2. */
int c1, c2;
int s1, s2;
- ptrdiff_t offset = PTR_TO_OFFSET (d);
- ptrdiff_t charpos = SYNTAX_TABLE_BYTE_TO_CHAR (offset) - 1;
+ ptrdiff_t offset = POINTER_TO_OFFSET (d);
+ ptrdiff_t charpos = RE_SYNTAX_TABLE_BYTE_TO_CHAR (offset) - 1;
UPDATE_SYNTAX_TABLE (charpos);
GET_CHAR_BEFORE_2 (c1, d, string1, end1, string2, end2);
nchars++;
@@ -5008,8 +5181,8 @@ re_match_2_internal (struct re_pattern_buffer *bufp,
mcnt);
PREFETCH ();
{
- ptrdiff_t offset = PTR_TO_OFFSET (d);
- ptrdiff_t pos1 = SYNTAX_TABLE_BYTE_TO_CHAR (offset);
+ ptrdiff_t offset = POINTER_TO_OFFSET (d);
+ ptrdiff_t pos1 = RE_SYNTAX_TABLE_BYTE_TO_CHAR (offset);
UPDATE_SYNTAX_TABLE (pos1);
}
{
@@ -5082,8 +5255,7 @@ re_match_2_internal (struct re_pattern_buffer *bufp,
case succeed_n:
d = str;
continue_failure_jump:
- EXTRACT_NUMBER_AND_INCR (mcnt, pat);
- p = pat + mcnt;
+ p = extract_address (pat);
break;
case no_op:
@@ -5106,13 +5278,18 @@ re_match_2_internal (struct re_pattern_buffer *bufp,
if (best_regs_set)
goto restore_best_regs;
+endof_re_match:
unbind_to (count, Qnil);
SAFE_FREE ();
+ /* The factor of 50 below is a heuristic that needs to be tuned.
+ It means we consider 50 buffer positions examined by this function
+ roughly equivalent to the display engine iterating over a single
+ buffer position. */
if (max_redisplay_ticks > 0 && nchars > 0)
update_redisplay_ticks (nchars / 50 + 1, NULL);
- return -1; /* Failure to match. */
+ return retval;
}
/* Subroutine definitions for re_match_2. */