From cabddfd1bd29060a914a62f201d6bbb050528407 Mon Sep 17 00:00:00 2001 From: Basile Henry Date: Sun, 7 Mar 2021 14:10:30 +0100 Subject: [PATCH 1/3] Use Unicode words for movement This also skips multiple consecutive non-word characters (word boundaries) as part of the movement in a similar way to how bash/readline do it. --- src/line_buffer.rs | 69 +++++++++++++++++++++++++++++++--------------- 1 file changed, 47 insertions(+), 22 deletions(-) diff --git a/src/line_buffer.rs b/src/line_buffer.rs index 68c2e019..ff36cdcb 100644 --- a/src/line_buffer.rs +++ b/src/line_buffer.rs @@ -100,38 +100,63 @@ impl LineBuffer { } pub fn move_word_left(&mut self) -> usize { - match self - .buffer - .rmatch_indices(&[' ', '\t'][..]) - .find(|(index, _)| index < &(self.insertion_point - 1)) - { - Some((index, _)) => { - self.insertion_point = index + 1; - } - None => { - self.insertion_point = 0; + let mut words = self.buffer[..self.insertion_point - 1] // valid UTF-8 slice when insertion_point at grapheme boundary + .split_word_bound_indices() + .rev(); + + loop { + match words.next() { + Some((_, word)) if is_word_boundary(word) => { + // This is a word boundary, go to the next one + continue; + } + Some((index, _)) => { + self.insertion_point = index; + } + None => { + self.insertion_point = 0; + } } + + return self.insertion_point; } - self.insertion_point } pub fn move_word_right(&mut self) -> usize { - match self - .buffer - .match_indices(&[' ', '\t'][..]) - .find(|(index, _)| index > &(self.insertion_point)) - { - Some((index, _)) => { - self.insertion_point = index + 1; - } - None => { - self.insertion_point = self.get_buffer_len(); + let mut words = self.buffer[self.insertion_point..] + .split_word_bound_indices(); + + let mut word_found = false; + + loop { + match words.next() { + Some((offset, word)) => { + if word_found { + self.insertion_point += offset; + } else { + // If the current word isn't a word boundary we have found the word to move + // past + word_found = !is_word_boundary(word); + + // Go to the next word + continue; + } + } + None => { + self.insertion_point = self.buffer.len(); + } } + + return self.insertion_point; } - self.insertion_point } } +/// Match any sequence of characters that are considered a word boundary +fn is_word_boundary(s: &str) -> bool { + !s.chars().any(char::is_alphanumeric) +} + #[test] fn emoji_test() { //TODO From 2e3a95503b0b3f1e6fc40132ea7bb78593105593 Mon Sep 17 00:00:00 2001 From: Basile Henry Date: Sun, 7 Mar 2021 23:12:20 +0100 Subject: [PATCH 2/3] Simplify logic --- src/line_buffer.rs | 55 ++++++++++++++-------------------------------- 1 file changed, 17 insertions(+), 38 deletions(-) diff --git a/src/line_buffer.rs b/src/line_buffer.rs index ff36cdcb..59d7c4ca 100644 --- a/src/line_buffer.rs +++ b/src/line_buffer.rs @@ -104,51 +104,30 @@ impl LineBuffer { .split_word_bound_indices() .rev(); - loop { - match words.next() { - Some((_, word)) if is_word_boundary(word) => { - // This is a word boundary, go to the next one - continue; - } - Some((index, _)) => { - self.insertion_point = index; - } - None => { - self.insertion_point = 0; - } + while let Some((index, word)) = words.next() { + if !is_word_boundary(word) { + self.insertion_point = index; + return self.insertion_point; } - - return self.insertion_point; } + + self.insertion_point = 0; + self.insertion_point } pub fn move_word_right(&mut self) -> usize { - let mut words = self.buffer[self.insertion_point..] - .split_word_bound_indices(); - - let mut word_found = false; - - loop { - match words.next() { - Some((offset, word)) => { - if word_found { - self.insertion_point += offset; - } else { - // If the current word isn't a word boundary we have found the word to move - // past - word_found = !is_word_boundary(word); - - // Go to the next word - continue; - } - } - None => { - self.insertion_point = self.buffer.len(); - } - } + let mut words = self.buffer[self.insertion_point..].split_word_bound_indices(); - return self.insertion_point; + while let Some((offset, word)) = words.next() { + if !is_word_boundary(word) { + // Move the insertion point just past the end of the next word + self.insertion_point += offset + word.len(); + return self.insertion_point; + } } + + self.insertion_point = self.buffer.len(); + self.insertion_point } } From 713c9f4060ea79f100e7f298f6691ccec767f18a Mon Sep 17 00:00:00 2001 From: Basile Henry Date: Sun, 7 Mar 2021 23:32:26 +0100 Subject: [PATCH 3/3] Fix off by one slicing issue and simplify logic further --- src/line_buffer.rs | 26 +++++++++++++++----------- 1 file changed, 15 insertions(+), 11 deletions(-) diff --git a/src/line_buffer.rs b/src/line_buffer.rs index 59d7c4ca..f7dca189 100644 --- a/src/line_buffer.rs +++ b/src/line_buffer.rs @@ -100,33 +100,37 @@ impl LineBuffer { } pub fn move_word_left(&mut self) -> usize { - let mut words = self.buffer[..self.insertion_point - 1] // valid UTF-8 slice when insertion_point at grapheme boundary + let mut words = self.buffer[..self.insertion_point] .split_word_bound_indices() - .rev(); + .filter(|(_, word)| !is_word_boundary(word)); - while let Some((index, word)) = words.next() { - if !is_word_boundary(word) { + match words.next_back() { + Some((index, _)) => { self.insertion_point = index; - return self.insertion_point; + } + None => { + self.insertion_point = 0; } } - self.insertion_point = 0; self.insertion_point } pub fn move_word_right(&mut self) -> usize { - let mut words = self.buffer[self.insertion_point..].split_word_bound_indices(); + let mut words = self.buffer[self.insertion_point..] + .split_word_bound_indices() + .filter(|(_, word)| !is_word_boundary(word)); - while let Some((offset, word)) = words.next() { - if !is_word_boundary(word) { + match words.next() { + Some((offset, word)) => { // Move the insertion point just past the end of the next word self.insertion_point += offset + word.len(); - return self.insertion_point; + } + None => { + self.insertion_point = self.buffer.len(); } } - self.insertion_point = self.buffer.len(); self.insertion_point } }