summaryrefslogtreecommitdiff
path: root/src/regex-emacs.c
diff options
context:
space:
mode:
authorMattias EngdegÄrd <mattiase@acm.org>2020-12-08 12:47:58 +0100
committerMattias EngdegÄrd <mattiase@acm.org>2020-12-09 10:35:13 +0100
commitbe4d6b043fa79e2d9a9911ca1c48bdcc84e3bba9 (patch)
treec44d668b1703733302d0c8131b4f86c55cf88124 /src/regex-emacs.c
parent22caab8bacf76ae439f8b647218b37334bfd87bd (diff)
downloademacs-be4d6b043fa79e2d9a9911ca1c48bdcc84e3bba9.tar.gz
Fix [:upper:] and [:lower:] for Unicode characters (bug#11309)
* src/regex-emacs.c (execute_charset): Add canon_table argument to allow expression of a correct predicate for [:upper:] and [:lower:]. (mutually_exclusive_p, re_match_2_internal): Pass extra argument. * test/src/regex-emacs-tests.el (regexp-case-fold, regexp-eszett): New tests. Parts of regexp-eszett still fail and are commented out.
Diffstat (limited to 'src/regex-emacs.c')
-rw-r--r--src/regex-emacs.c17
1 files changed, 9 insertions, 8 deletions
diff --git a/src/regex-emacs.c b/src/regex-emacs.c
index 971a5f63749..904ca0c7b95 100644
--- a/src/regex-emacs.c
+++ b/src/regex-emacs.c
@@ -3575,9 +3575,11 @@ skip_noops (re_char *p, re_char *pend)
opcode. When the function finishes, *PP will be advanced past that opcode.
C is character to test (possibly after translations) and CORIG is original
character (i.e. without any translations). UNIBYTE denotes whether c is
- unibyte or multibyte character. */
+ unibyte or multibyte character.
+ CANON_TABLE is the canonicalisation table for case folding or Qnil. */
static bool
-execute_charset (re_char **pp, int c, int corig, bool unibyte)
+execute_charset (re_char **pp, int c, int corig, bool unibyte,
+ Lisp_Object canon_table)
{
eassume (0 <= c && 0 <= corig);
re_char *p = *pp, *rtp = NULL;
@@ -3617,11 +3619,9 @@ execute_charset (re_char **pp, int c, int corig, bool unibyte)
(class_bits & BIT_BLANK && ISBLANK (c)) ||
(class_bits & BIT_WORD && ISWORD (c)) ||
((class_bits & BIT_UPPER) &&
- (ISUPPER (c) || (corig != c &&
- c == downcase (corig) && ISLOWER (c)))) ||
+ (ISUPPER (corig) || (!NILP (canon_table) && ISLOWER (corig)))) ||
((class_bits & BIT_LOWER) &&
- (ISLOWER (c) || (corig != c &&
- c == upcase (corig) && ISUPPER(c)))) ||
+ (ISLOWER (corig) || (!NILP (canon_table) && ISUPPER (corig)))) ||
(class_bits & BIT_PUNCT && ISPUNCT (c)) ||
(class_bits & BIT_GRAPH && ISGRAPH (c)) ||
(class_bits & BIT_PRINT && ISPRINT (c)))
@@ -3696,7 +3696,8 @@ mutually_exclusive_p (struct re_pattern_buffer *bufp, re_char *p1,
else if ((re_opcode_t) *p1 == charset
|| (re_opcode_t) *p1 == charset_not)
{
- if (!execute_charset (&p1, c, c, !multibyte || ASCII_CHAR_P (c)))
+ if (!execute_charset (&p1, c, c, !multibyte || ASCII_CHAR_P (c),
+ Qnil))
{
DEBUG_PRINT (" No match => fast loop.\n");
return true;
@@ -4367,7 +4368,7 @@ re_match_2_internal (struct re_pattern_buffer *bufp,
}
p -= 1;
- if (!execute_charset (&p, c, corig, unibyte_char))
+ if (!execute_charset (&p, c, corig, unibyte_char, translate))
goto fail;
d += len;