summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorEli Zaretskii <eliz@gnu.org>2016-03-06 18:14:46 +0200
committerEli Zaretskii <eliz@gnu.org>2016-03-06 18:14:46 +0200
commitcc057e43130780e566f4b901a59a88edb3f5130d (patch)
tree085a5bb85eb5a019f8d17a837a6110ff58a3ab55
parente51b27ec2ce214ef3d8377a27ee9d857bcc66afc (diff)
downloademacs-cc057e4.tar.gz
Speed up redisplay of binary files with long series of nulls
* src/bidi.c (bidi_resolve_weak): Avoid entering a loop searching for a character needed for resolving the type of a series of BN and ET characters, as required by rule W5 of UAX#9, if the results of the resolution are known in advance, because we are at level zero, and the previous strong character was L. (bidi_resolve_neutral): Partially resurrect the optimization for a long series of control characters in an otherwise strictly L2R text. (bidi_level_of_next_char): Don't enter the loop that searches for a paragraph separator if the current character is already at base embedding level. (Bug#22739)
-rw-r--r--src/bidi.c40
1 files changed, 34 insertions, 6 deletions
diff --git a/src/bidi.c b/src/bidi.c
index e7787054e23..9797517e420 100644
--- a/src/bidi.c
+++ b/src/bidi.c
@@ -2318,7 +2318,31 @@ bidi_resolve_weak (struct bidi_it *bidi_it)
if (bidi_it->next_en_type == WEAK_EN) /* ET/BN with EN after it */
type = WEAK_EN;
}
- else if (bidi_it->next_en_pos >=0)
+ else if (type == WEAK_BN
+ /* This condition is for the following important case:
+
+ . we are at level zero
+ . either previous strong character was L,
+ or we've seen no strong characters since sos
+ and the base paragraph direction is L2R
+ . this BN is NOT a bidi directional control
+
+ For such a situation, either this BN will be
+ converted to EN per W5, and then to L by virtue
+ of W7; or it will become ON per W6, and then L
+ because of N1/N2. So we take a shortcut here
+ and make it L right away, to avoid the
+ potentially costly loop below. This is
+ important when the buffer has a long series of
+ control characters, like binary nulls, and no
+ R2L characters at all. */
+ && new_level == 0
+ && !bidi_explicit_dir_char (bidi_it->ch)
+ && ((bidi_it->last_strong.type == STRONG_L)
+ || (bidi_it->last_strong.type == UNKNOWN_BT
+ && bidi_it->sos == L2R)))
+ type = STRONG_L;
+ else if (bidi_it->next_en_pos >= 0)
{
/* We overstepped the last known position for ET
resolution but there could be other such characters
@@ -2981,9 +3005,10 @@ bidi_resolve_neutral (struct bidi_it *bidi_it)
entering the expensive loop in the "else" clause. */
else if (current_level == 0
&& bidi_it->prev_for_neutral.type == STRONG_L
- && type != WEAK_BN
- && !bidi_explicit_dir_char (bidi_it->ch)
- && !bidi_isolate_fmt_char (type))
+ && (ASCII_CHAR_P (bidi_it->ch)
+ || (type != WEAK_BN
+ && !bidi_explicit_dir_char (bidi_it->ch)
+ && !bidi_isolate_fmt_char (type))))
type = bidi_resolve_neutral_1 (bidi_it->prev_for_neutral.type,
STRONG_L, current_level);
else if (/* current level is 1 */
@@ -3163,7 +3188,7 @@ bidi_level_of_next_char (struct bidi_it *bidi_it)
}
}
- /* Perhaps the character we want is already cached s fully resolved.
+ /* Perhaps the character we want is already cached as fully resolved.
If it is, the call to bidi_cache_find below will return a type
other than UNKNOWN_BT. */
if (bidi_cache_idx > bidi_cache_start && !bidi_it->first_elt)
@@ -3223,7 +3248,10 @@ bidi_level_of_next_char (struct bidi_it *bidi_it)
if ((bidi_it->orig_type == NEUTRAL_WS
|| bidi_it->orig_type == WEAK_BN
|| bidi_isolate_fmt_char (bidi_it->orig_type))
- && bidi_it->next_for_ws.charpos < bidi_it->charpos)
+ && bidi_it->next_for_ws.charpos < bidi_it->charpos
+ /* If this character is already at base level, we don't need to
+ reset it, so avoid the potentially costly loop below. */
+ && level != bidi_it->level_stack[0].level)
{
int ch;
ptrdiff_t clen = bidi_it->ch_len;