diff options
author | Eli Zaretskii <eliz@gnu.org> | 2016-03-06 18:14:46 +0200 |
---|---|---|
committer | Eli Zaretskii <eliz@gnu.org> | 2016-03-06 18:14:46 +0200 |
commit | cc057e43130780e566f4b901a59a88edb3f5130d (patch) | |
tree | 085a5bb85eb5a019f8d17a837a6110ff58a3ab55 | |
parent | e51b27ec2ce214ef3d8377a27ee9d857bcc66afc (diff) | |
download | emacs-cc057e4.tar.gz |
Speed up redisplay of binary files with long series of nulls
* src/bidi.c (bidi_resolve_weak): Avoid entering a loop searching
for a character needed for resolving the type of a series of BN
and ET characters, as required by rule W5 of UAX#9, if the results
of the resolution are known in advance, because we are at level
zero, and the previous strong character was L.
(bidi_resolve_neutral): Partially resurrect the optimization for a
long series of control characters in an otherwise strictly L2R
text.
(bidi_level_of_next_char): Don't enter the loop that searches for
a paragraph separator if the current character is already at base
embedding level. (Bug#22739)
-rw-r--r-- | src/bidi.c | 40 |
1 files changed, 34 insertions, 6 deletions
diff --git a/src/bidi.c b/src/bidi.c index e7787054e23..9797517e420 100644 --- a/src/bidi.c +++ b/src/bidi.c @@ -2318,7 +2318,31 @@ bidi_resolve_weak (struct bidi_it *bidi_it) if (bidi_it->next_en_type == WEAK_EN) /* ET/BN with EN after it */ type = WEAK_EN; } - else if (bidi_it->next_en_pos >=0) + else if (type == WEAK_BN + /* This condition is for the following important case: + + . we are at level zero + . either previous strong character was L, + or we've seen no strong characters since sos + and the base paragraph direction is L2R + . this BN is NOT a bidi directional control + + For such a situation, either this BN will be + converted to EN per W5, and then to L by virtue + of W7; or it will become ON per W6, and then L + because of N1/N2. So we take a shortcut here + and make it L right away, to avoid the + potentially costly loop below. This is + important when the buffer has a long series of + control characters, like binary nulls, and no + R2L characters at all. */ + && new_level == 0 + && !bidi_explicit_dir_char (bidi_it->ch) + && ((bidi_it->last_strong.type == STRONG_L) + || (bidi_it->last_strong.type == UNKNOWN_BT + && bidi_it->sos == L2R))) + type = STRONG_L; + else if (bidi_it->next_en_pos >= 0) { /* We overstepped the last known position for ET resolution but there could be other such characters @@ -2981,9 +3005,10 @@ bidi_resolve_neutral (struct bidi_it *bidi_it) entering the expensive loop in the "else" clause. */ else if (current_level == 0 && bidi_it->prev_for_neutral.type == STRONG_L - && type != WEAK_BN - && !bidi_explicit_dir_char (bidi_it->ch) - && !bidi_isolate_fmt_char (type)) + && (ASCII_CHAR_P (bidi_it->ch) + || (type != WEAK_BN + && !bidi_explicit_dir_char (bidi_it->ch) + && !bidi_isolate_fmt_char (type)))) type = bidi_resolve_neutral_1 (bidi_it->prev_for_neutral.type, STRONG_L, current_level); else if (/* current level is 1 */ @@ -3163,7 +3188,7 @@ bidi_level_of_next_char (struct bidi_it *bidi_it) } } - /* Perhaps the character we want is already cached s fully resolved. + /* Perhaps the character we want is already cached as fully resolved. If it is, the call to bidi_cache_find below will return a type other than UNKNOWN_BT. */ if (bidi_cache_idx > bidi_cache_start && !bidi_it->first_elt) @@ -3223,7 +3248,10 @@ bidi_level_of_next_char (struct bidi_it *bidi_it) if ((bidi_it->orig_type == NEUTRAL_WS || bidi_it->orig_type == WEAK_BN || bidi_isolate_fmt_char (bidi_it->orig_type)) - && bidi_it->next_for_ws.charpos < bidi_it->charpos) + && bidi_it->next_for_ws.charpos < bidi_it->charpos + /* If this character is already at base level, we don't need to + reset it, so avoid the potentially costly loop below. */ + && level != bidi_it->level_stack[0].level) { int ch; ptrdiff_t clen = bidi_it->ch_len; |