diff --git a/lsp/src/Language/LSP/VFS.hs b/lsp/src/Language/LSP/VFS.hs index ecf09599..a26c6be2 100644 --- a/lsp/src/Language/LSP/VFS.hs +++ b/lsp/src/Language/LSP/VFS.hs @@ -81,10 +81,11 @@ import Data.Row import Data.Text (Text) import Data.Text qualified as T import Data.Text.IO qualified as T +import Data.Text.Lines as Char (Position (..)) import Data.Text.Prettyprint.Doc hiding (line) -import Data.Text.Rope qualified as URope -import Data.Text.Utf16.Rope (Rope) -import Data.Text.Utf16.Rope qualified as Rope +import Data.Text.Utf16.Lines as Utf16 (Position (..)) +import Data.Text.Utf16.Rope.Mixed (Rope) +import Data.Text.Utf16.Rope.Mixed qualified as Rope import Language.LSP.Protocol.Lens qualified as J import Language.LSP.Protocol.Message qualified as J import Language.LSP.Protocol.Types qualified as J @@ -115,7 +116,7 @@ data VFS = VFS deriving (Show) data VfsLog - = SplitInsideCodePoint Rope.Position Rope + = SplitInsideCodePoint Utf16.Position Rope | URINotFound J.NormalizedUri | Opening J.NormalizedUri | Closing J.NormalizedUri @@ -350,7 +351,7 @@ applyChange :: (Monad m) => LogAction m (WithSeverity VfsLog) -> Rope -> J.TextD applyChange logger str (J.TextDocumentContentChangeEvent (J.InL e)) | J.Range (J.Position sl sc) (J.Position fl fc) <- e .! #range , txt <- e .! #text = - changeChars logger str (Rope.Position (fromIntegral sl) (fromIntegral sc)) (Rope.Position (fromIntegral fl) (fromIntegral fc)) txt + changeChars logger str (Utf16.Position (fromIntegral sl) (fromIntegral sc)) (Utf16.Position (fromIntegral fl) (fromIntegral fc)) txt applyChange _ _ (J.TextDocumentContentChangeEvent (J.InR e)) = pure $ Rope.fromText $ e .! #text @@ -360,11 +361,11 @@ applyChange _ _ (J.TextDocumentContentChangeEvent (J.InR e)) = the given range with the new text. If the given positions lie within a code point then this does nothing (returns the original 'Rope') and logs. -} -changeChars :: (Monad m) => LogAction m (WithSeverity VfsLog) -> Rope -> Rope.Position -> Rope.Position -> Text -> m Rope +changeChars :: (Monad m) => LogAction m (WithSeverity VfsLog) -> Rope -> Utf16.Position -> Utf16.Position -> Text -> m Rope changeChars logger str start finish new = do - case Rope.splitAtPosition finish str of + case Rope.utf16SplitAtPosition finish str of Nothing -> logger <& SplitInsideCodePoint finish str `WithSeverity` Warning >> pure str - Just (before, after) -> case Rope.splitAtPosition start before of + Just (before, after) -> case Rope.utf16SplitAtPosition start before of Nothing -> logger <& SplitInsideCodePoint start before `WithSeverity` Warning >> pure str Just (before', _) -> pure $ mconcat [before', Rope.fromText new, after] @@ -402,11 +403,14 @@ In particular, we use the good asymptotics of 'Rope' to our advantage: - We then split the line at the given position, and check how long the prefix is, which takes linear time in the length of the (single) line. -We also may need to convert the line back and forth between ropes with different indexing. Again -this is linear time in the length of the line. - So the overall process is logarithmic in the number of lines, and linear in the length of the specific line. Which is okay-ish, so long as we don't have very long lines. + +We are not able to use the `Rope.splitAtPosition` +Because when column index out of range or when the column indexing at the newline char. +The prefix result would wrap over the line and having the same result (nextLineNum, 0). +We would not be able to distinguish them. When the first case should return `Nothing`, +second case should return a `Just (CurrentLineNum, columnNumberConverted)`. -} {- | Extracts a specific line from a 'Rope.Rope'. @@ -415,41 +419,12 @@ line. Which is okay-ish, so long as we don't have very long lines. extractLine :: Rope.Rope -> Word -> Maybe Rope.Rope extractLine rope l = do -- Check for the line being out of bounds - let lastLine = Rope.posLine $ Rope.lengthAsPosition rope + let lastLine = Utf16.posLine $ Rope.utf16LengthAsPosition rope guard $ l <= lastLine - let (_, suffix) = Rope.splitAtLine l rope (prefix, _) = Rope.splitAtLine 1 suffix pure prefix -{- | Translate a code-point offset into a code-unit offset. - Linear in the length of the rope. --} -codePointOffsetToCodeUnitOffset :: URope.Rope -> Word -> Maybe Word -codePointOffsetToCodeUnitOffset rope offset = do - -- Check for the position being out of bounds - guard $ offset <= URope.length rope - -- Split at the given position in *code points* - let (prefix, _) = URope.splitAt offset rope - -- Convert the prefix to a rope using *code units* - utf16Prefix = Rope.fromText $ URope.toText prefix - -- Get the length of the prefix in *code units* - pure $ Rope.length utf16Prefix - -{- | Translate a UTF-16 code-unit offset into a code-point offset. - Linear in the length of the rope. --} -codeUnitOffsetToCodePointOffset :: Rope.Rope -> Word -> Maybe Word -codeUnitOffsetToCodePointOffset rope offset = do - -- Check for the position being out of bounds - guard $ offset <= Rope.length rope - -- Split at the given position in *code units* - (prefix, _) <- Rope.splitAt offset rope - -- Convert the prefix to a rope using *code points* - let utfPrefix = URope.fromText $ Rope.toText prefix - -- Get the length of the prefix in *code points* - pure $ URope.length utfPrefix - {- | Given a virtual file, translate a 'CodePointPosition' in that file into a 'J.Position' in that file. Will return 'Nothing' if the requested position is out of bounds of the document. @@ -458,15 +433,12 @@ codeUnitOffsetToCodePointOffset rope offset = do the position. -} codePointPositionToPosition :: VirtualFile -> CodePointPosition -> Maybe J.Position -codePointPositionToPosition vFile (CodePointPosition l cpc) = do +codePointPositionToPosition vFile (CodePointPosition l c) = do -- See Note [Converting between code points and code units] let text = _file_text vFile - utf16Line <- extractLine text (fromIntegral l) - -- Convert the line a rope using *code points* - let utfLine = URope.fromText $ Rope.toText utf16Line - - cuc <- codePointOffsetToCodeUnitOffset utfLine (fromIntegral cpc) - pure $ J.Position l (fromIntegral cuc) + lineRope <- extractLine text $ fromIntegral l + guard $ c <= fromIntegral (Rope.charLength lineRope) + return $ J.Position l (fromIntegral $ Rope.utf16Length $ fst $ Rope.charSplitAt (fromIntegral c) lineRope) {- | Given a virtual file, translate a 'CodePointRange' in that file into a 'J.Range' in that file. @@ -487,13 +459,12 @@ codePointRangeToRange vFile (CodePointRange b e) = the position. -} positionToCodePointPosition :: VirtualFile -> J.Position -> Maybe CodePointPosition -positionToCodePointPosition vFile (J.Position l cuc) = do +positionToCodePointPosition vFile (J.Position l c) = do -- See Note [Converting between code points and code units] let text = _file_text vFile - utf16Line <- extractLine text (fromIntegral l) - - cpc <- codeUnitOffsetToCodePointOffset utf16Line (fromIntegral cuc) - pure $ CodePointPosition l (fromIntegral cpc) + lineRope <- extractLine text $ fromIntegral l + guard $ c <= fromIntegral (Rope.utf16Length lineRope) + CodePointPosition l . fromIntegral . Rope.charLength . fst <$> Rope.utf16SplitAt (fromIntegral c) lineRope {- | Given a virtual file, translate a 'J.Range' in that file into a 'CodePointRange' in that file. @@ -535,7 +506,7 @@ getCompletionPrefix pos@(J.Position l c) (VirtualFile _ _ ropetext) = lastMaybe xs = Just $ last xs let curRope = fst $ Rope.splitAtLine 1 $ snd $ Rope.splitAtLine (fromIntegral l) ropetext - beforePos <- Rope.toText . fst <$> Rope.splitAt (fromIntegral c) curRope + beforePos <- Rope.toText . fst <$> Rope.utf16SplitAt (fromIntegral c) curRope curWord <- if | T.null beforePos -> Just "" diff --git a/lsp/test/VspSpec.hs b/lsp/test/VspSpec.hs index 335620ec..8167553e 100644 --- a/lsp/test/VspSpec.hs +++ b/lsp/test/VspSpec.hs @@ -6,7 +6,7 @@ module VspSpec where import Data.Row import Data.String import Data.Text qualified as T -import Data.Text.Utf16.Rope qualified as Rope +import Data.Text.Utf16.Rope.Mixed qualified as Rope import Language.LSP.Protocol.Types qualified as J import Language.LSP.VFS