|
| 1 | +package fscache |
| 2 | + |
| 3 | +import ( |
| 4 | + "errors" |
| 5 | + iofs "io/fs" |
| 6 | + "os" |
| 7 | + "path/filepath" |
| 8 | + "slices" |
| 9 | + "strings" |
| 10 | + "sync" |
| 11 | + "time" |
| 12 | + |
| 13 | + "cuelang.org/go/cue/ast" |
| 14 | + "cuelang.org/go/cue/build" |
| 15 | + "cuelang.org/go/cue/parser" |
| 16 | + "cuelang.org/go/internal/filetypes" |
| 17 | + "cuelang.org/go/internal/golangorgx/gopls/protocol" |
| 18 | + "cuelang.org/go/internal/golangorgx/tools/robustio" |
| 19 | + "cuelang.org/go/mod/module" |
| 20 | +) |
| 21 | + |
| 22 | +// A FileHandle represents the URI, content (including parsed CUE), |
| 23 | +// and optional version of a file tracked by the LSP session. |
| 24 | +// |
| 25 | +// FileHandle content may be provided by the file system or from an |
| 26 | +// overlay, for open files. |
| 27 | +type FileHandle interface { |
| 28 | + // URI is the URI for this file handle. |
| 29 | + URI() protocol.DocumentURI |
| 30 | + // ReadCUE attempts to parse the file content as CUE, using the |
| 31 | + // provided parser config. Note that config is only used if there |
| 32 | + // is no existing cached [ast.File] value within the |
| 33 | + // File. Therefore, it is the user's responsibility to ensure that |
| 34 | + // only one config value is used for each file: if you change the |
| 35 | + // config value and re-read the file, you will not receive back an |
| 36 | + // updated [ast.File]. |
| 37 | + ReadCUE(config parser.Config) (*ast.File, error) |
| 38 | + // Version returns the file version, as defined by the LSP client. |
| 39 | + Version() int32 |
| 40 | + // Content returns the contents of a file. The byte slice returned |
| 41 | + // is a copy of the underlying file content, and thus safe to be |
| 42 | + // mutated. This matches the behaviour of [iofs.ReadFileFS]. |
| 43 | + Content() []byte |
| 44 | +} |
| 45 | + |
| 46 | +type diskFileEntry struct { |
| 47 | + uri protocol.DocumentURI |
| 48 | + modTime time.Time |
| 49 | + |
| 50 | + // TODO: will need to add the means to get the buildFile out. And |
| 51 | + // probably refine the behavioul of err too. |
| 52 | + content []byte |
| 53 | + buildFile *build.File |
| 54 | + |
| 55 | + mu sync.Mutex |
| 56 | + ast *ast.File |
| 57 | +} |
| 58 | + |
| 59 | +var _ FileHandle = (*diskFileEntry)(nil) |
| 60 | + |
| 61 | +// URI implements [FileHandle] |
| 62 | +func (entry *diskFileEntry) URI() protocol.DocumentURI { return entry.uri } |
| 63 | + |
| 64 | +// ReadFileFS implements [FileHandle] |
| 65 | +func (entry *diskFileEntry) ReadCUE(config parser.Config) (*ast.File, error) { |
| 66 | + entry.mu.Lock() |
| 67 | + defer entry.mu.Unlock() |
| 68 | + |
| 69 | + if entry.ast != nil { |
| 70 | + return entry.ast, nil |
| 71 | + } |
| 72 | + |
| 73 | + bf := entry.buildFile |
| 74 | + if !(bf != nil && bf.Encoding == build.CUE && bf.Form == "" && bf.Interpretation == "") { |
| 75 | + return nil, nil |
| 76 | + } |
| 77 | + |
| 78 | + ast, err := parser.ParseFile(bf.Filename, bf.Source, config, parser.ParseComments) |
| 79 | + if err != nil { |
| 80 | + return nil, err |
| 81 | + } |
| 82 | + entry.ast = ast |
| 83 | + |
| 84 | + return entry.ast, nil |
| 85 | +} |
| 86 | + |
| 87 | +// Version implements [FileHandle] |
| 88 | +func (entry *diskFileEntry) Version() int32 { return -1 } |
| 89 | + |
| 90 | +// Content implements [FileHandle] |
| 91 | +func (entry *diskFileEntry) Content() []byte { return slices.Clone(entry.content) } |
| 92 | + |
| 93 | +func (entry *diskFileEntry) clone() *diskFileEntry { |
| 94 | + // copy everything apart from the mutex |
| 95 | + return &diskFileEntry{ |
| 96 | + uri: entry.uri, |
| 97 | + modTime: entry.modTime, |
| 98 | + content: entry.content, |
| 99 | + buildFile: entry.buildFile, |
| 100 | + ast: entry.ast, |
| 101 | + } |
| 102 | +} |
| 103 | + |
| 104 | +// CUECacheFS exists to cache [ast.File] values and thus amortize the |
| 105 | +// cost of parsing cue files. It is not an overlay in any way. Its |
| 106 | +// design is influenced by gopls's similar fs caching layer |
| 107 | +// (cache/fs_memoized.go in the gopls repo). CUECacheFS is also |
| 108 | +// designed to bridge the API gap between LSP, in which everything is |
| 109 | +// a URI, and our own module code (e.g. modpkgload) which is built |
| 110 | +// around [iofs.FS] and related interfaces. |
| 111 | +// |
| 112 | +// Note that CUECacheFS will return errors when reading files which |
| 113 | +// are not understood by [filetypes.ParseFileAndType]. |
| 114 | +type CUECacheFS struct { |
| 115 | + mu sync.Mutex |
| 116 | + // Due to symlinks etc, multiple uris/paths may map to the same |
| 117 | + // file. A diskFileEntry has a specific URI, but cueFilesByID |
| 118 | + // allows us to group them together by file node id, which we then |
| 119 | + // use to amortize reading from disk. |
| 120 | + cueFilesByID map[robustio.FileID][]*diskFileEntry |
| 121 | +} |
| 122 | + |
| 123 | +var _ RootableFS = (*CUECacheFS)(nil) |
| 124 | + |
| 125 | +func NewCUECachedFS() *CUECacheFS { |
| 126 | + return &CUECacheFS{ |
| 127 | + cueFilesByID: make(map[robustio.FileID][]*diskFileEntry), |
| 128 | + } |
| 129 | +} |
| 130 | + |
| 131 | +// purgeCacheUnder removes from the cache entries that match or are |
| 132 | +// enclosed by uri. It is allowed that uri here is a directory. |
| 133 | +func (fs *CUECacheFS) purgeCacheUnder(uri protocol.DocumentURI) { |
| 134 | + fs.mu.Lock() |
| 135 | + defer fs.mu.Unlock() |
| 136 | + |
| 137 | + for id, files := range fs.cueFilesByID { |
| 138 | + kept := slices.DeleteFunc(files, func(file *diskFileEntry) bool { |
| 139 | + return uri.Encloses(file.uri) |
| 140 | + }) |
| 141 | + if len(kept) == len(files) { // no files were dropped |
| 142 | + // noop |
| 143 | + } else if len(kept) == 0 { // all files were dropped |
| 144 | + delete(fs.cueFilesByID, id) |
| 145 | + } else { |
| 146 | + fs.cueFilesByID[id] = kept |
| 147 | + } |
| 148 | + } |
| 149 | +} |
| 150 | + |
| 151 | +// ReadFile stats and (maybe) reads the file, updates the cache, and |
| 152 | +// returns it. If uri does not exist, the error will be |
| 153 | +// [iofs.ErrNotExist]. If uri is a directory, the error will be |
| 154 | +// [iofs.PathError]. |
| 155 | +func (fs *CUECacheFS) ReadFile(uri protocol.DocumentURI) (FileHandle, error) { |
| 156 | + id, mtime, err := robustio.GetFileID(uri.Path()) |
| 157 | + if err != nil { |
| 158 | + if errors.Is(err, iofs.ErrNotExist) { |
| 159 | + // URI could have been a file, or a directory. In both cases |
| 160 | + // it's not on disk now, so we need to purge the cache of |
| 161 | + // everything enclosed by uri. |
| 162 | + fs.purgeCacheUnder(uri) |
| 163 | + } |
| 164 | + return nil, err |
| 165 | + } |
| 166 | + |
| 167 | + // The following comment taken from gopls's cache/fs_memoized.go file: |
| 168 | + // |
| 169 | + // We check if the file has changed by comparing modification times. Notably, |
| 170 | + // this is an imperfect heuristic as various systems have low resolution |
| 171 | + // mtimes (as much as 1s on WSL or s390x builders), so we only cache |
| 172 | + // filehandles if mtime is old enough to be reliable, meaning that we don't |
| 173 | + // expect a subsequent write to have the same mtime. |
| 174 | + // |
| 175 | + // The coarsest mtime precision we've seen in practice is 1s, so consider |
| 176 | + // mtime to be unreliable if it is less than 2s old. Capture this before |
| 177 | + // doing anything else. |
| 178 | + recentlyModified := time.Since(mtime) < 2*time.Second |
| 179 | + |
| 180 | + fs.mu.Lock() |
| 181 | + files, ok := fs.cueFilesByID[id] |
| 182 | + if ok && files[0].modTime.Equal(mtime) { |
| 183 | + var entry *diskFileEntry |
| 184 | + // We have already seen this file and it has not changed. |
| 185 | + for _, fh := range files { |
| 186 | + if fh.uri == uri { |
| 187 | + entry = fh |
| 188 | + break |
| 189 | + } |
| 190 | + } |
| 191 | + // No file handle for this exact URI. Create an alias, but share content. |
| 192 | + if entry == nil { |
| 193 | + entry := files[0].clone() |
| 194 | + entry.uri = uri |
| 195 | + files = append(files, entry) |
| 196 | + fs.cueFilesByID[id] = files |
| 197 | + } |
| 198 | + fs.mu.Unlock() |
| 199 | + return entry, nil |
| 200 | + } |
| 201 | + fs.mu.Unlock() |
| 202 | + |
| 203 | + // Unknown file, or file has changed. Read (or re-read) it. |
| 204 | + // |
| 205 | + // The following comment taken from gopls's cache/fs_memoized.go file: |
| 206 | + // |
| 207 | + // It is possible that a race causes us to read a file with |
| 208 | + // different file ID, or whose mtime differs from our |
| 209 | + // mtime. However, in these cases we expect the client to notify of |
| 210 | + // a subsequent file change, and the file content should be |
| 211 | + // eventually consistent. |
| 212 | + df, err := readFile(uri, mtime) |
| 213 | + |
| 214 | + fs.mu.Lock() |
| 215 | + // Only cache it if it's not been recentlyModified and it has no errors. |
| 216 | + if !recentlyModified && err == nil { |
| 217 | + // It's possible that two goroutines attempt to read the same |
| 218 | + // file at the same time, and both find the cache for the id |
| 219 | + // either empty or invalid. They will both proceed and perform |
| 220 | + // the read from disk. At this point, they will race and one |
| 221 | + // will overwrite and throw away the cache content from the |
| 222 | + // other. |
| 223 | + // |
| 224 | + // However, any subsequent re-read of the file will make use of |
| 225 | + // the cache, and the benefit is that we allow concurrent reads |
| 226 | + // from disk: keeping the mutex whilst we do the readFile call |
| 227 | + // would prevent any concurrency when reading from disk. Thus we |
| 228 | + // make the argument that this is more important than rare |
| 229 | + // amounts of duplicated disk-reads. |
| 230 | + fs.cueFilesByID[id] = []*diskFileEntry{df} |
| 231 | + } else { |
| 232 | + delete(fs.cueFilesByID, id) |
| 233 | + } |
| 234 | + fs.mu.Unlock() |
| 235 | + |
| 236 | + if err != nil { |
| 237 | + return nil, err |
| 238 | + } |
| 239 | + return df, nil |
| 240 | +} |
| 241 | + |
| 242 | +func readFile(uri protocol.DocumentURI, mtime time.Time) (*diskFileEntry, error) { |
| 243 | + // NB filePath is GOOS-appropriate (uri.Path() calls [filepath.FromSlash]) |
| 244 | + filePath := uri.Path() |
| 245 | + content, err := os.ReadFile(filePath) |
| 246 | + if err != nil { |
| 247 | + return nil, err |
| 248 | + } |
| 249 | + entry := &diskFileEntry{ |
| 250 | + modTime: mtime, |
| 251 | + uri: uri, |
| 252 | + content: content, |
| 253 | + } |
| 254 | + |
| 255 | + bf, err := filetypes.ParseFileAndType(filePath, "", filetypes.Input) |
| 256 | + if err != nil { |
| 257 | + return nil, err |
| 258 | + } |
| 259 | + bf.Source = content |
| 260 | + entry.buildFile = bf |
| 261 | + |
| 262 | + return entry, nil |
| 263 | +} |
| 264 | + |
| 265 | +// IoFS implements [RootableFS] |
| 266 | +func (fs *CUECacheFS) IoFS(root string) CUEDirFS { |
| 267 | + root = strings.TrimRight(root, string(os.PathSeparator)) |
| 268 | + return &rootedCUECacheFS{ |
| 269 | + cuecachefs: fs, |
| 270 | + delegatefs: os.DirFS(root).(DirFS), |
| 271 | + root: root, |
| 272 | + } |
| 273 | +} |
| 274 | + |
| 275 | +type RootableFS interface { |
| 276 | + // IoFS creates a CUEDirFS, for the tree of files rooted at the |
| 277 | + // directory root. Note the root is GOOS-appropriate. |
| 278 | + IoFS(root string) CUEDirFS |
| 279 | +} |
| 280 | + |
| 281 | +type DirFS interface { |
| 282 | + iofs.FS |
| 283 | + iofs.ReadDirFS |
| 284 | + iofs.ReadFileFS |
| 285 | + iofs.StatFS |
| 286 | +} |
| 287 | + |
| 288 | +type CUEDirFS interface { |
| 289 | + DirFS |
| 290 | + module.OSRootFS |
| 291 | + module.ReadCUEFS |
| 292 | +} |
| 293 | + |
| 294 | +// rootedCUECacheFS is a wrapper over [CUECacheFS] that implements |
| 295 | +// [iofs.FS], [iofs.ReadDirFS], [iofs.ReadFileFS], [iofs.StatFS], |
| 296 | +// [module.OSRootFS], and [module.ReadCUEFS] |
| 297 | +type rootedCUECacheFS struct { |
| 298 | + cuecachefs *CUECacheFS |
| 299 | + delegatefs DirFS |
| 300 | + // NB root is GOOS-appropriate |
| 301 | + root string |
| 302 | +} |
| 303 | + |
| 304 | +var _ CUEDirFS = (*rootedCUECacheFS)(nil) |
| 305 | + |
| 306 | +// OSRoot implements [module.OSRootFS] |
| 307 | +func (fs *rootedCUECacheFS) OSRoot() string { |
| 308 | + return fs.root |
| 309 | +} |
| 310 | + |
| 311 | +// Open implements [iofs.FS] |
| 312 | +func (fs *rootedCUECacheFS) Open(name string) (iofs.File, error) { return fs.delegatefs.Open(name) } |
| 313 | + |
| 314 | +// ReadCUEFile implements [module.ReadCUEFS] |
| 315 | +func (fs *rootedCUECacheFS) ReadCUEFile(name string, config parser.Config) (*ast.File, error) { |
| 316 | + if !iofs.ValidPath(name) { |
| 317 | + return nil, &iofs.PathError{Op: "ReadCUEFile", Path: name, Err: iofs.ErrInvalid} |
| 318 | + } |
| 319 | + name, err := filepath.Localize(name) |
| 320 | + if err != nil { |
| 321 | + return nil, &iofs.PathError{Op: "ReadCUEFile", Path: name, Err: err} |
| 322 | + } |
| 323 | + |
| 324 | + uri := protocol.URIFromPath(filepath.Join(fs.root, name)) |
| 325 | + fh, err := fs.cuecachefs.ReadFile(uri) |
| 326 | + if err != nil { |
| 327 | + return nil, err |
| 328 | + } |
| 329 | + return fh.ReadCUE(config) |
| 330 | +} |
| 331 | + |
| 332 | +// ReadDir implements [iofs.ReadDirFS] |
| 333 | +func (fs *rootedCUECacheFS) ReadDir(name string) ([]iofs.DirEntry, error) { |
| 334 | + return fs.delegatefs.ReadDir(name) |
| 335 | +} |
| 336 | + |
| 337 | +// ReadFile implements [iofs.ReadFileFS] |
| 338 | +func (fs *rootedCUECacheFS) ReadFile(name string) ([]byte, error) { |
| 339 | + if !iofs.ValidPath(name) { |
| 340 | + return nil, &iofs.PathError{Op: "ReadFile", Path: name, Err: iofs.ErrInvalid} |
| 341 | + } |
| 342 | + name, err := filepath.Localize(name) |
| 343 | + if err != nil { |
| 344 | + return nil, &iofs.PathError{Op: "ReadFile", Path: name, Err: err} |
| 345 | + } |
| 346 | + uri := protocol.URIFromPath(filepath.Join(fs.root, name)) |
| 347 | + fh, err := fs.cuecachefs.ReadFile(uri) |
| 348 | + if err != nil { |
| 349 | + return nil, err |
| 350 | + } |
| 351 | + |
| 352 | + return fh.Content(), nil |
| 353 | +} |
| 354 | + |
| 355 | +// Stat implements [iofs.StatFS] |
| 356 | +func (fs *rootedCUECacheFS) Stat(name string) (iofs.FileInfo, error) { |
| 357 | + return fs.delegatefs.Stat(name) |
| 358 | +} |
0 commit comments