Skip to content

Commit 955e7f4

Browse files
committed
internal/lsp/fscache: Create CUECacheFS and OverlayFS
Our module-loading code has all been built around fs.FS and related interfaces. The LSP speaks only of DocumentURIs. CUECacheFS and OverlayFS exist to bridge these different views of file systems. CUECacheFS is built around similar concepts to the upstream gopls fs_memoized. To that is added: - the ability to parse cue files, caching the resulting ast.File - support for fs.FS and related APIs OverlayFS then extends CUECacheFS to support a mutable overlay. The mutation is provided by a transactional-style API (Update/View). OverlayFS also then supports all the necessary fs.FS interfaces, successfully combining overlays with content from the underlying CUECacheFS. Signed-off-by: Matthew Sackman <[email protected]> Change-Id: I1ba741a037759b116c52de5fbfe1a301058b9968 Reviewed-on: https://review.gerrithub.io/c/cue-lang/cue/+/1217916 TryBot-Result: CUEcueckoo <[email protected]> Unity-Result: CUE porcuepine <[email protected]> Reviewed-by: Roger Peppe <[email protected]>
1 parent 3b10030 commit 955e7f4

File tree

3 files changed

+1424
-0
lines changed

3 files changed

+1424
-0
lines changed

internal/lsp/fscache/fs_cache.go

Lines changed: 358 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,358 @@
1+
package fscache
2+
3+
import (
4+
"errors"
5+
iofs "io/fs"
6+
"os"
7+
"path/filepath"
8+
"slices"
9+
"strings"
10+
"sync"
11+
"time"
12+
13+
"cuelang.org/go/cue/ast"
14+
"cuelang.org/go/cue/build"
15+
"cuelang.org/go/cue/parser"
16+
"cuelang.org/go/internal/filetypes"
17+
"cuelang.org/go/internal/golangorgx/gopls/protocol"
18+
"cuelang.org/go/internal/golangorgx/tools/robustio"
19+
"cuelang.org/go/mod/module"
20+
)
21+
22+
// A FileHandle represents the URI, content (including parsed CUE),
23+
// and optional version of a file tracked by the LSP session.
24+
//
25+
// FileHandle content may be provided by the file system or from an
26+
// overlay, for open files.
27+
type FileHandle interface {
28+
// URI is the URI for this file handle.
29+
URI() protocol.DocumentURI
30+
// ReadCUE attempts to parse the file content as CUE, using the
31+
// provided parser config. Note that config is only used if there
32+
// is no existing cached [ast.File] value within the
33+
// File. Therefore, it is the user's responsibility to ensure that
34+
// only one config value is used for each file: if you change the
35+
// config value and re-read the file, you will not receive back an
36+
// updated [ast.File].
37+
ReadCUE(config parser.Config) (*ast.File, error)
38+
// Version returns the file version, as defined by the LSP client.
39+
Version() int32
40+
// Content returns the contents of a file. The byte slice returned
41+
// is a copy of the underlying file content, and thus safe to be
42+
// mutated. This matches the behaviour of [iofs.ReadFileFS].
43+
Content() []byte
44+
}
45+
46+
type diskFileEntry struct {
47+
uri protocol.DocumentURI
48+
modTime time.Time
49+
50+
// TODO: will need to add the means to get the buildFile out. And
51+
// probably refine the behavioul of err too.
52+
content []byte
53+
buildFile *build.File
54+
55+
mu sync.Mutex
56+
ast *ast.File
57+
}
58+
59+
var _ FileHandle = (*diskFileEntry)(nil)
60+
61+
// URI implements [FileHandle]
62+
func (entry *diskFileEntry) URI() protocol.DocumentURI { return entry.uri }
63+
64+
// ReadFileFS implements [FileHandle]
65+
func (entry *diskFileEntry) ReadCUE(config parser.Config) (*ast.File, error) {
66+
entry.mu.Lock()
67+
defer entry.mu.Unlock()
68+
69+
if entry.ast != nil {
70+
return entry.ast, nil
71+
}
72+
73+
bf := entry.buildFile
74+
if !(bf != nil && bf.Encoding == build.CUE && bf.Form == "" && bf.Interpretation == "") {
75+
return nil, nil
76+
}
77+
78+
ast, err := parser.ParseFile(bf.Filename, bf.Source, config, parser.ParseComments)
79+
if err != nil {
80+
return nil, err
81+
}
82+
entry.ast = ast
83+
84+
return entry.ast, nil
85+
}
86+
87+
// Version implements [FileHandle]
88+
func (entry *diskFileEntry) Version() int32 { return -1 }
89+
90+
// Content implements [FileHandle]
91+
func (entry *diskFileEntry) Content() []byte { return slices.Clone(entry.content) }
92+
93+
func (entry *diskFileEntry) clone() *diskFileEntry {
94+
// copy everything apart from the mutex
95+
return &diskFileEntry{
96+
uri: entry.uri,
97+
modTime: entry.modTime,
98+
content: entry.content,
99+
buildFile: entry.buildFile,
100+
ast: entry.ast,
101+
}
102+
}
103+
104+
// CUECacheFS exists to cache [ast.File] values and thus amortize the
105+
// cost of parsing cue files. It is not an overlay in any way. Its
106+
// design is influenced by gopls's similar fs caching layer
107+
// (cache/fs_memoized.go in the gopls repo). CUECacheFS is also
108+
// designed to bridge the API gap between LSP, in which everything is
109+
// a URI, and our own module code (e.g. modpkgload) which is built
110+
// around [iofs.FS] and related interfaces.
111+
//
112+
// Note that CUECacheFS will return errors when reading files which
113+
// are not understood by [filetypes.ParseFileAndType].
114+
type CUECacheFS struct {
115+
mu sync.Mutex
116+
// Due to symlinks etc, multiple uris/paths may map to the same
117+
// file. A diskFileEntry has a specific URI, but cueFilesByID
118+
// allows us to group them together by file node id, which we then
119+
// use to amortize reading from disk.
120+
cueFilesByID map[robustio.FileID][]*diskFileEntry
121+
}
122+
123+
var _ RootableFS = (*CUECacheFS)(nil)
124+
125+
func NewCUECachedFS() *CUECacheFS {
126+
return &CUECacheFS{
127+
cueFilesByID: make(map[robustio.FileID][]*diskFileEntry),
128+
}
129+
}
130+
131+
// purgeCacheUnder removes from the cache entries that match or are
132+
// enclosed by uri. It is allowed that uri here is a directory.
133+
func (fs *CUECacheFS) purgeCacheUnder(uri protocol.DocumentURI) {
134+
fs.mu.Lock()
135+
defer fs.mu.Unlock()
136+
137+
for id, files := range fs.cueFilesByID {
138+
kept := slices.DeleteFunc(files, func(file *diskFileEntry) bool {
139+
return uri.Encloses(file.uri)
140+
})
141+
if len(kept) == len(files) { // no files were dropped
142+
// noop
143+
} else if len(kept) == 0 { // all files were dropped
144+
delete(fs.cueFilesByID, id)
145+
} else {
146+
fs.cueFilesByID[id] = kept
147+
}
148+
}
149+
}
150+
151+
// ReadFile stats and (maybe) reads the file, updates the cache, and
152+
// returns it. If uri does not exist, the error will be
153+
// [iofs.ErrNotExist]. If uri is a directory, the error will be
154+
// [iofs.PathError].
155+
func (fs *CUECacheFS) ReadFile(uri protocol.DocumentURI) (FileHandle, error) {
156+
id, mtime, err := robustio.GetFileID(uri.Path())
157+
if err != nil {
158+
if errors.Is(err, iofs.ErrNotExist) {
159+
// URI could have been a file, or a directory. In both cases
160+
// it's not on disk now, so we need to purge the cache of
161+
// everything enclosed by uri.
162+
fs.purgeCacheUnder(uri)
163+
}
164+
return nil, err
165+
}
166+
167+
// The following comment taken from gopls's cache/fs_memoized.go file:
168+
//
169+
// We check if the file has changed by comparing modification times. Notably,
170+
// this is an imperfect heuristic as various systems have low resolution
171+
// mtimes (as much as 1s on WSL or s390x builders), so we only cache
172+
// filehandles if mtime is old enough to be reliable, meaning that we don't
173+
// expect a subsequent write to have the same mtime.
174+
//
175+
// The coarsest mtime precision we've seen in practice is 1s, so consider
176+
// mtime to be unreliable if it is less than 2s old. Capture this before
177+
// doing anything else.
178+
recentlyModified := time.Since(mtime) < 2*time.Second
179+
180+
fs.mu.Lock()
181+
files, ok := fs.cueFilesByID[id]
182+
if ok && files[0].modTime.Equal(mtime) {
183+
var entry *diskFileEntry
184+
// We have already seen this file and it has not changed.
185+
for _, fh := range files {
186+
if fh.uri == uri {
187+
entry = fh
188+
break
189+
}
190+
}
191+
// No file handle for this exact URI. Create an alias, but share content.
192+
if entry == nil {
193+
entry := files[0].clone()
194+
entry.uri = uri
195+
files = append(files, entry)
196+
fs.cueFilesByID[id] = files
197+
}
198+
fs.mu.Unlock()
199+
return entry, nil
200+
}
201+
fs.mu.Unlock()
202+
203+
// Unknown file, or file has changed. Read (or re-read) it.
204+
//
205+
// The following comment taken from gopls's cache/fs_memoized.go file:
206+
//
207+
// It is possible that a race causes us to read a file with
208+
// different file ID, or whose mtime differs from our
209+
// mtime. However, in these cases we expect the client to notify of
210+
// a subsequent file change, and the file content should be
211+
// eventually consistent.
212+
df, err := readFile(uri, mtime)
213+
214+
fs.mu.Lock()
215+
// Only cache it if it's not been recentlyModified and it has no errors.
216+
if !recentlyModified && err == nil {
217+
// It's possible that two goroutines attempt to read the same
218+
// file at the same time, and both find the cache for the id
219+
// either empty or invalid. They will both proceed and perform
220+
// the read from disk. At this point, they will race and one
221+
// will overwrite and throw away the cache content from the
222+
// other.
223+
//
224+
// However, any subsequent re-read of the file will make use of
225+
// the cache, and the benefit is that we allow concurrent reads
226+
// from disk: keeping the mutex whilst we do the readFile call
227+
// would prevent any concurrency when reading from disk. Thus we
228+
// make the argument that this is more important than rare
229+
// amounts of duplicated disk-reads.
230+
fs.cueFilesByID[id] = []*diskFileEntry{df}
231+
} else {
232+
delete(fs.cueFilesByID, id)
233+
}
234+
fs.mu.Unlock()
235+
236+
if err != nil {
237+
return nil, err
238+
}
239+
return df, nil
240+
}
241+
242+
func readFile(uri protocol.DocumentURI, mtime time.Time) (*diskFileEntry, error) {
243+
// NB filePath is GOOS-appropriate (uri.Path() calls [filepath.FromSlash])
244+
filePath := uri.Path()
245+
content, err := os.ReadFile(filePath)
246+
if err != nil {
247+
return nil, err
248+
}
249+
entry := &diskFileEntry{
250+
modTime: mtime,
251+
uri: uri,
252+
content: content,
253+
}
254+
255+
bf, err := filetypes.ParseFileAndType(filePath, "", filetypes.Input)
256+
if err != nil {
257+
return nil, err
258+
}
259+
bf.Source = content
260+
entry.buildFile = bf
261+
262+
return entry, nil
263+
}
264+
265+
// IoFS implements [RootableFS]
266+
func (fs *CUECacheFS) IoFS(root string) CUEDirFS {
267+
root = strings.TrimRight(root, string(os.PathSeparator))
268+
return &rootedCUECacheFS{
269+
cuecachefs: fs,
270+
delegatefs: os.DirFS(root).(DirFS),
271+
root: root,
272+
}
273+
}
274+
275+
type RootableFS interface {
276+
// IoFS creates a CUEDirFS, for the tree of files rooted at the
277+
// directory root. Note the root is GOOS-appropriate.
278+
IoFS(root string) CUEDirFS
279+
}
280+
281+
type DirFS interface {
282+
iofs.FS
283+
iofs.ReadDirFS
284+
iofs.ReadFileFS
285+
iofs.StatFS
286+
}
287+
288+
type CUEDirFS interface {
289+
DirFS
290+
module.OSRootFS
291+
module.ReadCUEFS
292+
}
293+
294+
// rootedCUECacheFS is a wrapper over [CUECacheFS] that implements
295+
// [iofs.FS], [iofs.ReadDirFS], [iofs.ReadFileFS], [iofs.StatFS],
296+
// [module.OSRootFS], and [module.ReadCUEFS]
297+
type rootedCUECacheFS struct {
298+
cuecachefs *CUECacheFS
299+
delegatefs DirFS
300+
// NB root is GOOS-appropriate
301+
root string
302+
}
303+
304+
var _ CUEDirFS = (*rootedCUECacheFS)(nil)
305+
306+
// OSRoot implements [module.OSRootFS]
307+
func (fs *rootedCUECacheFS) OSRoot() string {
308+
return fs.root
309+
}
310+
311+
// Open implements [iofs.FS]
312+
func (fs *rootedCUECacheFS) Open(name string) (iofs.File, error) { return fs.delegatefs.Open(name) }
313+
314+
// ReadCUEFile implements [module.ReadCUEFS]
315+
func (fs *rootedCUECacheFS) ReadCUEFile(name string, config parser.Config) (*ast.File, error) {
316+
if !iofs.ValidPath(name) {
317+
return nil, &iofs.PathError{Op: "ReadCUEFile", Path: name, Err: iofs.ErrInvalid}
318+
}
319+
name, err := filepath.Localize(name)
320+
if err != nil {
321+
return nil, &iofs.PathError{Op: "ReadCUEFile", Path: name, Err: err}
322+
}
323+
324+
uri := protocol.URIFromPath(filepath.Join(fs.root, name))
325+
fh, err := fs.cuecachefs.ReadFile(uri)
326+
if err != nil {
327+
return nil, err
328+
}
329+
return fh.ReadCUE(config)
330+
}
331+
332+
// ReadDir implements [iofs.ReadDirFS]
333+
func (fs *rootedCUECacheFS) ReadDir(name string) ([]iofs.DirEntry, error) {
334+
return fs.delegatefs.ReadDir(name)
335+
}
336+
337+
// ReadFile implements [iofs.ReadFileFS]
338+
func (fs *rootedCUECacheFS) ReadFile(name string) ([]byte, error) {
339+
if !iofs.ValidPath(name) {
340+
return nil, &iofs.PathError{Op: "ReadFile", Path: name, Err: iofs.ErrInvalid}
341+
}
342+
name, err := filepath.Localize(name)
343+
if err != nil {
344+
return nil, &iofs.PathError{Op: "ReadFile", Path: name, Err: err}
345+
}
346+
uri := protocol.URIFromPath(filepath.Join(fs.root, name))
347+
fh, err := fs.cuecachefs.ReadFile(uri)
348+
if err != nil {
349+
return nil, err
350+
}
351+
352+
return fh.Content(), nil
353+
}
354+
355+
// Stat implements [iofs.StatFS]
356+
func (fs *rootedCUECacheFS) Stat(name string) (iofs.FileInfo, error) {
357+
return fs.delegatefs.Stat(name)
358+
}

0 commit comments

Comments
 (0)