diff --git a/cabal-install/Distribution/Client/FileMonitor.hs b/cabal-install/Distribution/Client/FileMonitor.hs new file mode 100644 index 00000000000..4715f849551 --- /dev/null +++ b/cabal-install/Distribution/Client/FileMonitor.hs @@ -0,0 +1,888 @@ +{-# LANGUAGE CPP, DeriveGeneric, DeriveFunctor, GeneralizedNewtypeDeriving, + NamedFieldPuns, BangPatterns #-} +{-# OPTIONS_GHC -fno-warn-orphans #-} + +-- | An abstraction to help with re-running actions when files or other +-- input values they depend on have changed. +-- +module Distribution.Client.FileMonitor ( + + -- * Declaring files to monitor + MonitorFilePath(..), + FilePathGlob(..), + monitorFileSearchPath, + monitorFileHashedSearchPath, + + -- * Creating and checking sets of monitored files + FileMonitor(..), + newFileMonitor, + MonitorChanged(..), + MonitorChangedReason(..), + checkFileMonitorChanged, + updateFileMonitor, + + matchFileGlob, + ) where + + +#if MIN_VERSION_containers(0,5,0) +import Data.Map.Strict (Map) +import qualified Data.Map.Strict as Map +#else +import Data.Map (Map) +import qualified Data.Map as Map +#endif +import qualified Data.ByteString.Lazy as BS +import Distribution.Compat.Binary +import qualified Distribution.Compat.Binary as Binary +#if !MIN_VERSION_base(4,8,0) +import Data.Traversable (traverse) +#endif +import qualified Data.Hashable as Hashable +import Data.List (sort) +#if MIN_VERSION_directory(1,2,0) +import Data.Time (UTCTime(..), Day(..)) +#else +import System.Time (ClockTime(..)) +#endif + +#if !MIN_VERSION_base(4,8,0) +import Control.Applicative +#endif +import Control.Monad +import Control.Monad.Trans (MonadIO, liftIO) +import Control.Monad.State (StateT) +import qualified Control.Monad.State as State +import Control.Monad.Except (ExceptT, runExceptT, throwError) +import Control.Exception + +import Distribution.Text +import Distribution.Compat.ReadP ((<++)) +import qualified Distribution.Compat.ReadP as ReadP +import qualified Text.PrettyPrint as Disp + +import Distribution.Client.Glob +import Distribution.Simple.Utils (writeFileAtomic) +import Distribution.Client.Utils (mergeBy, MergeResult(..)) + +import System.FilePath +import System.Directory +import System.IO +import System.IO.Error +import GHC.Generics (Generic) + + +------------------------------------------------------------------------------ +-- Types for specifying files to monitor +-- + + +-- | A description of a file (or set of files) to monitor for changes. +-- +-- All file paths here are relative to a common directory (e.g. project root). +-- +data MonitorFilePath = + + -- | Monitor a single file for changes, based on its modification time. + -- The monitored file is considered to have changed if it no longer + -- exists or if its modification time has changed. + -- + MonitorFile !FilePath + + -- | Monitor a single file for changes, based on its modification time + -- and content hash. The monitored file is considered to have changed if + -- it no longer exists or if its modification time and content hash have + -- changed. + -- + | MonitorFileHashed !FilePath + + -- | Monitor a single non-existent file for changes. The monitored file + -- is considered to have changed if it exists. + -- + | MonitorNonExistentFile !FilePath + + -- | Monitor a set of files identified by a file glob. The monitored glob + -- is considered to have changed if the set of files matching the glob + -- changes (i.e. creations or deletions), or if the modification time and + -- content hash of any matching file has changed. + -- + | MonitorFileGlob !FilePathGlob + -- Note: currently file globs always use mtime+hash, so they're the + -- equivalent of MonitorFileHashed above. If we need globed files with + -- only mtime then it's perfectly ok to add it. + + deriving (Eq, Show, Generic) + +instance Binary MonitorFilePath + +-- | A file path specified by globbing +-- +data FilePathGlob + = GlobDir !Glob !FilePathGlob + | GlobFile !Glob + deriving (Eq, Show, Generic) + +instance Binary FilePathGlob + +-- | Creates a list of files to monitor when you search for a file which +-- unsuccessfully looked in @notFoundAtPaths@ before finding it at +-- @foundAtPath@. +monitorFileSearchPath :: [FilePath] -> FilePath -> [MonitorFilePath] +monitorFileSearchPath notFoundAtPaths foundAtPath = + MonitorFile foundAtPath + : map MonitorNonExistentFile notFoundAtPaths + +-- | Similar to 'monitorFileSearchPath', but also instructs us to +-- monitor the hash of the found file. +monitorFileHashedSearchPath :: [FilePath] -> FilePath -> [MonitorFilePath] +monitorFileHashedSearchPath notFoundAtPaths foundAtPath = + MonitorFileHashed foundAtPath + : map MonitorNonExistentFile notFoundAtPaths + + +------------------------------------------------------------------------------ +-- Implementation types, files status +-- + +-- | The state necessary to determine whether a set of monitored +-- files has changed. It consists of two parts: a set of specific +-- files to be monitored (index by their path), and a list of +-- globs, which monitor may files at once. +data MonitorStateFileSet + = MonitorStateFileSet !(Map FilePath MonitorStateFile) + ![MonitorStateGlob] + deriving Show + +type Hash = Int +#if MIN_VERSION_directory(1,2,0) +type ModTime = UTCTime +#else +type ModTime = ClockTime +#endif + +-- | The state necessary to determine whether a monitored file has changed. +-- +-- This covers all the cases of 'MonitorFilePath' except for globs which is +-- covered separately by 'MonitorStateGlob'. +-- +data MonitorStateFile + = MonitorStateFile !ModTime -- ^ cached file mtime + | MonitorStateFileHashed !ModTime !Hash -- ^ cached mtime and content hash + | MonitorStateFileNonExistent + + -- | These two are to deal with the situation where we've been asked + -- to monitor a file that's expected to exist, but when we come to + -- check it's status, it no longer exists. + | MonitorStateFileGone + | MonitorStateFileHashGone + deriving (Show, Generic) + +instance Binary MonitorStateFile + +-- | The state necessary to determine whether the files matched by a globbing +-- match have changed. +-- +data MonitorStateGlob + = MonitorStateGlobDirs + !Glob !FilePathGlob + !ModTime + ![(FilePath, MonitorStateGlob)] -- invariant: sorted + + | MonitorStateGlobFiles + !Glob + !ModTime + ![(FilePath, ModTime, Hash)] -- invariant: sorted + deriving (Show, Generic) + +instance Binary MonitorStateGlob + +-- | We can build a 'MonitorStateFileSet' from a set of 'MonitorFilePath' by +-- inspecting the state of the file system, and we can go in the reverse +-- direction by just forgetting the extra info. +-- +reconstructMonitorFilePaths :: MonitorStateFileSet -> [MonitorFilePath] +reconstructMonitorFilePaths (MonitorStateFileSet singlePaths globPaths) = + Map.foldrWithKey (\k x r -> getSinglePath k x : r) + (map getGlobPath globPaths) + singlePaths + where + getSinglePath filepath monitorState = + case monitorState of + MonitorStateFile{} -> MonitorFile filepath + MonitorStateFileHashed{} -> MonitorFileHashed filepath + MonitorStateFileNonExistent -> MonitorNonExistentFile filepath + MonitorStateFileGone -> MonitorFile filepath + MonitorStateFileHashGone -> MonitorFileHashed filepath + + getGlobPath (MonitorStateGlobDirs glob globs _ _) = + MonitorFileGlob (GlobDir glob globs) + getGlobPath (MonitorStateGlobFiles glob _ _) = + MonitorFileGlob (GlobFile glob) + +------------------------------------------------------------------------------ +-- Checking the status of monitored files +-- + +-- | A monitor for detecting changes to a set of files. It can be used to +-- efficiently test if any of a set of files (specified individually or by +-- glob patterns) has changed since some snapshot. In addition, it also checks +-- for changes in a value (of type @a@), and when there are no changes in +-- either it returns a saved value (of type @b@). +-- +-- The main use case looks like this: suppose we have some expensive action +-- that depends on certain pure inputs and reads some set of files, and +-- produces some pure result. We want to avoid re-running this action when it +-- would produce the same result. So we need to monitor the files the action +-- looked at, the other pure input values, and we need to cache the result. +-- Then at some later point, if the input value didn't change, and none of the +-- files changed, then we can re-use the cached result rather than re-running +-- the action. +-- +-- This can be achieved using a 'FileMonitor'. Each 'FileMonitor' instance +-- saves state in a disk file, so the file for that has to be specified, +-- making sure it is unique. The pattern is to use 'checkFileMonitorChanged' +-- to see if there's been any change. If there is, re-run the action, keeping +-- track of the files, then use 'updateFileMonitor' to record the current +-- set of files to monitor, the current input value for the action, and the +-- result of the action. +-- +-- The typical occurrence of this pattern is captured by 'rerunIfChanged' +-- and the 'Rebuild' monad. More complicated cases may need to use +-- 'checkFileMonitorChanged' and 'updateFileMonitor' directly. +-- +data FileMonitor a b + = FileMonitor { + + -- | The file where this 'FileMonitor' should store its state. + -- + fileMonitorCacheFile :: FilePath, + + -- | Compares a new cache key with old one to determine if a + -- corresponding cached value is still valid. + -- + -- Typically this is just an equality test, but in some + -- circumstances it can make sense to do things like subset + -- comparisons. + -- + -- The first arg is the new value, the second is the old cached value. + -- + fileMonitorKeyValid :: a -> a -> Bool, + + -- | When this mode is enabled, if 'checkFileMonitorChanged' returns + -- 'MonitoredValueChanged' then we have the guarantee that no files + -- changed, that the value change was the only change. In the default + -- mode no such guarantee is provided which is slightly faster. + -- + fileMonitorCheckIfOnlyValueChanged :: Bool + } + +-- | Define a new file monitor. +-- +-- It's best practice to define file monitor values once, and then use the +-- same value for 'checkFileMonitorChanged' and 'updateFileMonitor' as this +-- ensures you get the same types @a@ and @b@ for reading and writing. +-- +-- The path of the file monitor itself must be unique because it keeps state +-- on disk and these would clash. +-- +newFileMonitor :: Eq a => FilePath -- ^ The file to cache the state of the + -- file monitor. Must be unique. + -> FileMonitor a b +newFileMonitor path = FileMonitor path (==) False + +-- | The result of 'checkFileMonitorChanged': either the monitored files or +-- value changed (and it tells us which it was) or nothing changed and we get +-- the cached result. +-- +data MonitorChanged a b = + -- | The monitored files and value did not change. The cached result is + -- @b@. + -- + -- The set of monitored files is also returned. This is useful + -- for composing or nesting 'FileMonitor's. + MonitorUnchanged b [MonitorFilePath] + + -- | The monitor found that something changed. The reason is given. + -- + | MonitorChanged (MonitorChangedReason a) + deriving Show + +-- | What kind of change 'checkFileMonitorChanged' detected. +-- +data MonitorChangedReason a = + + -- | One of the files changed (existence, file type, mtime or file + -- content, depending on the 'MonitorFilePath' in question) + MonitoredFileChanged FilePath + + -- | The pure input value changed. + -- + -- The previous cached key value is also returned. This is sometimes + -- useful when using a 'fileMonitorKeyValid' function that is not simply + -- '(==)', when invalidation can be partial. In such cases it can make + -- sense to 'updateFileMonitor' with a key value that's a combination of + -- the new and old (e.g. set union). + | MonitoredValueChanged a + + -- | There was no saved monitor state, cached value etc. Ie the file + -- for the 'FileMonitor' does not exist. + | MonitorFirstRun + + -- | There was existing state, but we could not read it. This typically + -- happens when the code has changed compared to an existing 'FileMonitor' + -- cache file and type of the input value or cached value has changed such + -- that we cannot decode the values. This is completely benign as we can + -- treat is just as if there were no cache file and re-run. + | MonitorCorruptCache + deriving (Eq, Show, Functor) + +-- | Test if the input value or files monitored by the 'FileMonitor' have +-- changed. If not, return the cached value. +-- +-- See 'FileMonitor' for a full explanation. +-- +checkFileMonitorChanged + :: (Binary a, Binary b) + => FileMonitor a b -- ^ cache file path + -> FilePath -- ^ root directory + -> a -- ^ guard or key value + -> IO (MonitorChanged a b) -- ^ did the key or any paths change? +checkFileMonitorChanged + monitor@FileMonitor { fileMonitorKeyValid, + fileMonitorCheckIfOnlyValueChanged } + root currentKey = + + -- Consider it a change if the cache file does not exist, + -- or we cannot decode it. Sadly ErrorCall can still happen, despite + -- using decodeFileOrFail, e.g. Data.Char.chr errors + + handleDoesNotExist (MonitorChanged MonitorFirstRun) $ + handleErrorCall (MonitorChanged MonitorCorruptCache) $ + readCacheFile monitor + >>= either (\_ -> return (MonitorChanged MonitorCorruptCache)) + checkStatusCache + + where + checkStatusCache (cachedFileStatus, cachedKey, cachedResult) = do + change <- checkForChanges + case change of + Just reason -> return (MonitorChanged reason) + Nothing -> return (MonitorUnchanged cachedResult monitorFiles) + where monitorFiles = reconstructMonitorFilePaths cachedFileStatus + where + -- In fileMonitorCheckIfOnlyValueChanged mode we want to guarantee that + -- if we return MonitoredValueChanged that only the value changed. + -- We do that by checkin for file changes first. Otherwise it makes + -- more sense to do the cheaper test first. + checkForChanges + | fileMonitorCheckIfOnlyValueChanged + = checkFileChange cachedFileStatus cachedKey cachedResult + `mplusMaybeT` + checkValueChange cachedKey + + | otherwise + = checkValueChange cachedKey + `mplusMaybeT` + checkFileChange cachedFileStatus cachedKey cachedResult + + mplusMaybeT :: Monad m => m (Maybe a) -> m (Maybe a) -> m (Maybe a) + mplusMaybeT ma mb = do + mx <- ma + case mx of + Nothing -> mb + Just x -> return (Just x) + + -- Check if the guard value has changed + checkValueChange cachedKey + | not (fileMonitorKeyValid currentKey cachedKey) + = return (Just (MonitoredValueChanged cachedKey)) + | otherwise + = return Nothing + + -- Check if any file has changed + checkFileChange cachedFileStatus cachedKey cachedResult = do + res <- probeFileSystem root cachedFileStatus + case res of + -- Some monitored file has changed + Left changedPath -> + return (Just (MonitoredFileChanged (normalise changedPath))) + + -- No monitored file has changed + Right (cachedFileStatus', cacheStatus) -> do + + -- But we might still want to update the cache + whenCacheChanged cacheStatus $ + rewriteCacheFile monitor cachedFileStatus' cachedKey cachedResult + + return Nothing + +-- | Helper for reading the cache file. +-- +-- This determines the type and format of the binary cache file. +-- +readCacheFile :: (Binary a, Binary b) + => FileMonitor a b + -> IO (Either String (MonitorStateFileSet, a, b)) +readCacheFile FileMonitor {fileMonitorCacheFile} = + withBinaryFile fileMonitorCacheFile ReadMode $ \hnd -> + Binary.decodeOrFailIO =<< BS.hGetContents hnd + +-- | Helper for writing the cache file. +-- +-- This determines the type and format of the binary cache file. +-- +rewriteCacheFile :: (Binary a, Binary b) + => FileMonitor a b + -> MonitorStateFileSet -> a -> b -> IO () +rewriteCacheFile FileMonitor {fileMonitorCacheFile} fileset key result = + writeFileAtomic fileMonitorCacheFile $ + Binary.encode (fileset, key, result) + +-- | Probe the file system to see if any of the monitored files have changed. +-- +-- It returns Nothing if any file changed, or returns a possibly updated +-- file 'MonitorStateFileSet' plus an indicator of whether it actually changed. +-- +-- We may need to update the cache since there may be changes in the filesystem +-- state which don't change any of our affected files. +-- +-- Consider the glob @{proj1,proj2}\/\*.cabal@. Say we first run and find a +-- @proj1@ directory containing @proj1.cabal@ yet no @proj2@. If we later run +-- and find @proj2@ was created, yet contains no files matching @*.cabal@ then +-- we want to update the cache despite no changes in our relevant file set. +-- Specifically, we should add an mtime for this directory so we can avoid +-- re-traversing the directory in future runs. +-- +probeFileSystem :: FilePath -> MonitorStateFileSet + -> IO (Either FilePath (MonitorStateFileSet, CacheChanged)) +probeFileSystem root (MonitorStateFileSet singlePaths globPaths) = + runChangedM $ + MonitorStateFileSet + <$> traverseWithKey (probeFileStatus root) singlePaths + <*> traverse (probeGlobStatus root ".") globPaths + +traverseWithKey :: (Applicative t, Eq k) + => (k -> a -> t b) -> Map k a -> t (Map k b) +#if MIN_VERSION_containers(0,5,0) +traverseWithKey = Map.traverseWithKey +#else +traverseWithKey f = fmap Map.fromAscList + . traverse (\(k, v) -> (,) k <$> f k v) + . Map.toAscList +#endif + + +----------------------------------------------- +-- Monad for checking for file system changes +-- +-- We need to be able to bail out if we detect a change (using ExceptT), +-- but if there's no change we need to be able to rebuild the monitor +-- state. And we want to optimise that rebuilding by keeping track if +-- anything actually changed (using StateT), so that in the typical case +-- we can avoid rewriting the state file. + +newtype ChangedM a = ChangedM (StateT CacheChanged (ExceptT FilePath IO) a) + deriving (Functor, Applicative, Monad, MonadIO) + +runChangedM :: ChangedM a -> IO (Either FilePath (a, CacheChanged)) +runChangedM (ChangedM action) = + runExceptT $ State.runStateT action CacheUnchanged + +somethingChanged :: FilePath -> ChangedM a +somethingChanged path = ChangedM $ throwError path + +cacheChanged :: ChangedM () +cacheChanged = ChangedM $ State.put CacheChanged + +data CacheChanged = CacheChanged | CacheUnchanged + +whenCacheChanged :: Monad m => CacheChanged -> m () -> m () +whenCacheChanged CacheChanged action = action +whenCacheChanged CacheUnchanged _ = return () + +---------------------- + +-- | Probe the file system to see if a single monitored file has changed. +-- +probeFileStatus :: FilePath -> FilePath -> MonitorStateFile + -> ChangedM MonitorStateFile +probeFileStatus root file cached = do + case cached of + MonitorStateFile mtime -> probeFileModificationTime + root file mtime + MonitorStateFileHashed mtime hash -> probeFileModificationTimeAndHash + root file mtime hash + MonitorStateFileNonExistent -> probeFileNonExistence root file + MonitorStateFileGone -> somethingChanged file + MonitorStateFileHashGone -> somethingChanged file + + return cached + + +-- | Probe the file system to see if a monitored file glob has changed. +-- +probeGlobStatus :: FilePath -- ^ root path + -> FilePath -- ^ path of the directory we are looking in + -- relative to @root@ + -> MonitorStateGlob + -> ChangedM MonitorStateGlob +probeGlobStatus root dirName + (MonitorStateGlobDirs glob globPath mtime children) = do + change <- liftIO $ checkDirectoryModificationTime (root dirName) mtime + case change of + Nothing -> do + children' <- sequence + [ do fstate' <- probeGlobStatus root (dirName fname) fstate + return (fname, fstate') + | (fname, fstate) <- children ] + return $! MonitorStateGlobDirs glob globPath mtime children' + + Just mtime' -> do + -- directory modification time changed: + -- a matching subdir may have been added or deleted + matches <- filterM (\entry -> let subdir = root dirName entry + in liftIO $ doesDirectoryExist subdir) + . filter (globMatches glob) + =<< liftIO (getDirectoryContents (root dirName)) + + children' <- mapM probeMergeResult $ + mergeBy (\(path1,_) path2 -> compare path1 path2) + children + (sort matches) + return $! MonitorStateGlobDirs glob globPath mtime' children' + -- Note that just because the directory has changed, we don't force + -- a cache rewrite with 'cacheChanged' since that has some cost, and + -- all we're saving is scanning the directory. But we do rebuild the + -- cache with the new mtime', so that if the cache is rewritten for + -- some other reason, we'll take advantage of that. + + where + probeMergeResult :: MergeResult (FilePath, MonitorStateGlob) FilePath + -> ChangedM (FilePath, MonitorStateGlob) + + -- Only in cached (directory deleted) + probeMergeResult (OnlyInLeft (path, fstate)) = + case allMatchingFiles (dirName path) fstate of + [] -> return (path, fstate) + -- Strictly speaking we should be returning 'CacheChanged' above + -- as we should prune the now-missing 'MonitorStateGlob'. However + -- we currently just leave these now-redundant entries in the + -- cache as they cost no IO and keeping them allows us to avoid + -- rewriting the cache. + (file:_) -> somethingChanged file + + -- Only in current filesystem state (directory added) + probeMergeResult (OnlyInRight path) = do + fstate <- liftIO $ buildMonitorStateGlob + root (dirName path) globPath + case allMatchingFiles (dirName path) fstate of + (file:_) -> somethingChanged file + -- This is the only case where we use 'cacheChanged' because we can + -- have a whole new dir subtree (of unbounded size and cost), so we + -- need to save the state of that new subtree in the cache. + [] -> cacheChanged >> return (path, fstate) + + -- Found in path + probeMergeResult (InBoth (path, fstate) _) = do + fstate' <- probeGlobStatus root (dirName path) fstate + return (path, fstate') + + -- | Does a 'MonitorStateGlob' have any relevant files within it? + allMatchingFiles :: FilePath -> MonitorStateGlob -> [FilePath] + allMatchingFiles dir (MonitorStateGlobFiles _ _ entries) = + [ dir fname | (fname, _, _) <- entries ] + allMatchingFiles dir (MonitorStateGlobDirs _ _ _ entries) = + [ res + | (subdir, fstate) <- entries + , res <- allMatchingFiles (dir subdir) fstate ] + + +probeGlobStatus root dirName (MonitorStateGlobFiles glob mtime children) = do + change <- liftIO $ checkDirectoryModificationTime (root dirName) mtime + mtime' <- case change of + Nothing -> return mtime + Just mtime' -> do + -- directory modification time changed: + -- a matching file may have been added or deleted + matches <- filterM (\entry -> let file = root dirName entry + in liftIO $ doesFileExist file) + . filter (globMatches glob) + =<< liftIO (getDirectoryContents (root dirName)) + + mapM_ probeMergeResult $ + mergeBy (\(path1,_,_) path2 -> compare path1 path2) + children + (sort matches) + return mtime' + + -- Check that none of the children have changed + forM_ children $ \(file, fmtime, fhash) -> + probeFileModificationTimeAndHash root (dirName file) fmtime fhash + + return (MonitorStateGlobFiles glob mtime' children) + -- Again, we don't force a cache rewite with 'cacheChanged', but we do use + -- the new mtime' if any. + where + probeMergeResult :: MergeResult (FilePath, ModTime, Hash) FilePath + -> ChangedM () + probeMergeResult mr = case mr of + InBoth _ _ -> return () + -- this is just to be able to accurately report which file changed: + OnlyInLeft (path, _, _) -> somethingChanged (dirName path) + OnlyInRight path -> somethingChanged (dirName path) + +------------------------------------------------------------------------------ + +-- | Update the input value and the set of files monitored by the +-- 'FileMonitor', plus the cached value that may be returned in future. +-- +-- This takes a snapshot of the state of the monitored files right now, so +-- 'checkFileMonitorChanged' will look for file system changes relative to +-- this snapshot. So consider carefully when is the appropriate point to take +-- the snapshot. +-- +-- This is typically done once the action has been completed successfully and +-- we have the action's result and we know what files it looked at. See +-- 'FileMonitor' for a full explanation. +-- +updateFileMonitor + :: (Binary a, Binary b) + => FileMonitor a b -- ^ cache file path + -> FilePath -- ^ root directory + -> [MonitorFilePath] -- ^ files of interest relative to root + -> a -- ^ the current key value + -> b -- ^ the current result value + -> IO () +updateFileMonitor monitor root monitorFiles cachedKey cachedResult = do + fsc <- buildMonitorStateFileSet root monitorFiles + rewriteCacheFile monitor fsc cachedKey cachedResult + +-- | Take the snapshot of the monitored files. That is, given the +-- specification of the set of files we need to monitor, inspect the state +-- of the file system now and collect the information we'll need later to +-- determine if anything has changed. +-- +buildMonitorStateFileSet :: FilePath -- ^ root directory + -> [MonitorFilePath] -- ^ patterns of interest + -- relative to root + -> IO MonitorStateFileSet +buildMonitorStateFileSet root = + go Map.empty [] + where + go :: Map FilePath MonitorStateFile -> [MonitorStateGlob] + -> [MonitorFilePath] -> IO MonitorStateFileSet + go !singlePaths !globPaths [] = + return (MonitorStateFileSet singlePaths globPaths) + + go !singlePaths !globPaths (MonitorFile path : monitors) = do + let file = root path + monitorState <- handleDoesNotExist MonitorStateFileGone $ + MonitorStateFile <$> getModificationTime file + let singlePaths' = Map.insert path monitorState singlePaths + go singlePaths' globPaths monitors + + go !singlePaths !globPaths (MonitorFileHashed path : monitors) = do + let file = root path + monitorState <- handleDoesNotExist MonitorStateFileHashGone $ + MonitorStateFileHashed + <$> getModificationTime file + <*> readFileHash file + let singlePaths' = Map.insert path monitorState singlePaths + go singlePaths' globPaths monitors + + go !singlePaths !globPaths (MonitorNonExistentFile path : monitors) = do + let singlePaths' = Map.insert path MonitorStateFileNonExistent singlePaths + go singlePaths' globPaths monitors + + go !singlePaths !globPaths (MonitorFileGlob globPath : monitors) = do + monitorState <- buildMonitorStateGlob root "." globPath + go singlePaths (monitorState : globPaths) monitors + + +-- | Much like 'buildMonitorStateFileSet' but for the somewhat complicated case +-- of a file glob. +-- +-- This gets used both by 'buildMonitorStateFileSet' when we're taking the +-- file system snapshot, but also by 'probeGlobStatus' as part of checking +-- the monitored (globed) files for changes when we find a whole new subtree. +-- +buildMonitorStateGlob :: FilePath -- ^ the root directory + -> FilePath -- ^ directory we are examining + -- relative to the root + -> FilePathGlob -- ^ the matching glob + -> IO MonitorStateGlob +buildMonitorStateGlob root dir globPath = do + dirEntries <- getDirectoryContents (root dir) + dirMTime <- getModificationTime (root dir) + case globPath of + GlobDir glob globPath' -> do + subdirs <- filterM (\subdir -> doesDirectoryExist + (root dir subdir)) + $ filter (globMatches glob) dirEntries + subdirStates <- + forM (sort subdirs) $ \subdir -> do + fstate <- buildMonitorStateGlob root (dir subdir) globPath' + return (subdir, fstate) + return $! MonitorStateGlobDirs glob globPath' dirMTime subdirStates + + GlobFile glob -> do + files <- filterM (\fname -> doesFileExist (root dir fname)) + $ filter (globMatches glob) dirEntries + filesStates <- + forM (sort files) $ \file -> do + let path = root dir file + mtime <- getModificationTime path + hash <- readFileHash path + return (file, mtime, hash) + return $! MonitorStateGlobFiles glob dirMTime filesStates + +-- | Utility to match a file glob against the file system, starting from a +-- given root directory. The results are all relative to the given root. +-- +matchFileGlob :: FilePath -> FilePathGlob -> IO [FilePath] +matchFileGlob root glob0 = go glob0 "" + where + go (GlobFile glob) dir = do + entries <- getDirectoryContents (root dir) + let files = filter (globMatches glob) entries + return (map (dir ) files) + + go (GlobDir glob globPath) dir = do + entries <- getDirectoryContents (root dir) + subdirs <- filterM (\subdir -> doesDirectoryExist + (root dir subdir)) + $ filter (globMatches glob) entries + concat <$> mapM (\subdir -> go globPath (dir subdir)) subdirs +--TODO: [code cleanup] plausibly FilePathGlob and matchFileGlob should be +-- moved into D.C.Glob and/or merged with similar functionality in Cabal. + +------------------------------------------------------------------------------ +-- Utils +-- + +-- | Within the @root@ directory, check if @file@ has its 'ModTime' is +-- the same as @mtime@, short-circuiting if it is different. +probeFileModificationTime :: FilePath -> FilePath -> ModTime -> ChangedM () +probeFileModificationTime root file mtime = do + unchanged <- liftIO $ checkModificationTimeUnchanged root file mtime + unless unchanged (somethingChanged file) + +-- | Within the @root@ directory, check if @file@ has its 'ModTime' and +-- 'Hash' is the same as @mtime@ and @hash@, short-circuiting if it is +-- different. +probeFileModificationTimeAndHash :: FilePath -> FilePath -> ModTime -> Hash + -> ChangedM () +probeFileModificationTimeAndHash root file mtime hash = do + unchanged <- liftIO $ + checkFileModificationTimeAndHashUnchanged root file mtime hash + unless unchanged (somethingChanged file) + +-- | Within the @root@ directory, check if @file@ still does not exist. +-- If it *does* exist, short-circuit. +probeFileNonExistence :: FilePath -> FilePath -> ChangedM () +probeFileNonExistence root file = do + exists <- liftIO $ doesFileExist (root file) + when exists (somethingChanged file) + +-- | Returns @True@ if, inside the @root@ directory, @file@ has the same +-- 'ModTime' as @mtime@. +checkModificationTimeUnchanged :: FilePath -> FilePath + -> ModTime -> IO Bool +checkModificationTimeUnchanged root file mtime = + handleDoesNotExist False $ do + mtime' <- getModificationTime (root file) + return (mtime == mtime') + +-- | Returns @True@ if, inside the @root@ directory, @file@ has the +-- same 'ModTime' and 'Hash' as @mtime and @chash@. +checkFileModificationTimeAndHashUnchanged :: FilePath -> FilePath + -> ModTime -> Hash -> IO Bool +checkFileModificationTimeAndHashUnchanged root file mtime chash = + handleDoesNotExist False $ do + mtime' <- getModificationTime (root file) + if mtime == mtime' + then return True + else do + chash' <- readFileHash (root file) + return (chash == chash') + +-- | Read a non-cryptographic hash of a @file@. +readFileHash :: FilePath -> IO Hash +readFileHash file = + withBinaryFile file ReadMode $ \hnd -> + evaluate . Hashable.hash =<< BS.hGetContents hnd + +-- | Given a directory @dir@, return @Nothing@ if its 'ModTime' +-- is the same as @mtime@, and the new 'ModTime' if it is not. +checkDirectoryModificationTime :: FilePath -> ModTime -> IO (Maybe ModTime) +checkDirectoryModificationTime dir mtime = + handleDoesNotExist Nothing $ do + mtime' <- getModificationTime dir + if mtime == mtime' + then return Nothing + else return (Just mtime') + +-- | Run an IO computation, returning @e@ if it raises a "file +-- does not exist" error. +handleDoesNotExist :: a -> IO a -> IO a +handleDoesNotExist e = + handleJust + (\ioe -> if isDoesNotExistError ioe then Just ioe else Nothing) + (\_ -> return e) + +-- | Run an IO computation, returning @e@ if there is an 'error' +-- call. ('ErrorCall') +handleErrorCall :: a -> IO a -> IO a +handleErrorCall e = + handle (\(ErrorCall _) -> return e) + +------------------------------------------------------------------------------ +-- Instances +-- + +instance Text FilePathGlob where + disp (GlobDir glob pathglob) = disp glob Disp.<> Disp.char '/' + Disp.<> disp pathglob + disp (GlobFile glob) = disp glob + + parse = parse >>= \glob -> (asDir glob <++ asFile glob) + where + asDir glob = do _ <- ReadP.char '/' + globs <- parse + return (GlobDir glob globs) + asFile glob = return (GlobFile glob) + +#if MIN_VERSION_directory(1,2,0) +instance Binary UTCTime where + put (UTCTime (ModifiedJulianDay day) tod) = do + put day + put (toRational tod) + get = do + day <- get + tod <- get + return $! UTCTime (ModifiedJulianDay day) + (fromRational tod) +#else +instance Binary ClockTime where + put (TOD sec subsec) = do + put sec + put subsec + get = do + !sec <- get + !subsec <- get + return (TOD sec subsec) +#endif + +instance Binary MonitorStateFileSet where + put (MonitorStateFileSet singlePaths globPaths) = do + put (1 :: Int) -- version + put singlePaths + put globPaths + get = do + ver <- get + if ver == (1 :: Int) + then do singlePaths <- get + globPaths <- get + return $! MonitorStateFileSet singlePaths globPaths + else fail "MonitorStateFileSet: wrong version" + diff --git a/cabal-install/Distribution/Client/Glob.hs b/cabal-install/Distribution/Client/Glob.hs new file mode 100644 index 00000000000..60419d8c18c --- /dev/null +++ b/cabal-install/Distribution/Client/Glob.hs @@ -0,0 +1,106 @@ +{-# LANGUAGE DeriveGeneric #-} + +module Distribution.Client.Glob + ( GlobAtom(..) + , Glob (..) + , globMatches + ) where + +import Data.List (stripPrefix) +import Control.Monad (liftM2) +import Distribution.Compat.Binary +import GHC.Generics (Generic) + +import Distribution.Text +import Distribution.Compat.ReadP +import qualified Text.PrettyPrint as Disp + + +-- | A piece of a globbing pattern +data GlobAtom = WildCard + | Literal String + | Union [Glob] + deriving (Eq, Show, Generic) + +instance Binary GlobAtom + +-- | A single directory or file component of a globbed path +newtype Glob = Glob [GlobAtom] + deriving (Eq, Show, Generic) + +instance Binary Glob + + +-- | Test whether a file path component matches a globbing pattern +-- +globMatches :: Glob -> String -> Bool +globMatches (Glob atoms) = goStart atoms + where + -- From the man page, glob(7): + -- "If a filename starts with a '.', this character must be + -- matched explicitly." + + go, goStart :: [GlobAtom] -> String -> Bool + + goStart (WildCard:_) ('.':_) = False + goStart (Union globs:rest) cs = any (\(Glob glob) -> + goStart (glob ++ rest) cs) globs + goStart rest cs = go rest cs + + go [] "" = True + go (Literal lit:rest) cs + | Just cs' <- stripPrefix lit cs + = go rest cs' + | otherwise = False + go [WildCard] "" = True + go (WildCard:rest) (c:cs) = go rest (c:cs) || go (WildCard:rest) cs + go (Union globs:rest) cs = any (\(Glob glob) -> + go (glob ++ rest) cs) globs + go [] (_:_) = False + go (_:_) "" = False + +instance Text Glob where + disp (Glob atoms) = Disp.hcat (map dispAtom atoms) + where + dispAtom WildCard = Disp.char '*' + dispAtom (Literal str) = Disp.text (escape str) + dispAtom (Union globs) = Disp.braces + (Disp.hcat (Disp.punctuate (Disp.char ',') + (map disp globs))) + + escape [] = [] + escape (c:cs) + | isGlobEscapedChar c = '\\' : c : escape cs + | otherwise = c : escape cs + + parse = Glob `fmap` many1 globAtom + where + globAtom :: ReadP r GlobAtom + globAtom = literal +++ wildcard +++ union + + wildcard = char '*' >> return WildCard + + union = between (char '{') (char '}') + (fmap (Union . map Glob) $ sepBy1 (many1 globAtom) (char ',')) + + literal = Literal `fmap` many1' + where + litchar = normal +++ escape + + normal = satisfy (not . isGlobEscapedChar) + escape = char '\\' >> satisfy isGlobEscapedChar + + many1' :: ReadP r [Char] + many1' = liftM2 (:) litchar many' + + many' :: ReadP r [Char] + many' = many1' <++ return [] + +isGlobEscapedChar :: Char -> Bool +isGlobEscapedChar '*' = True +isGlobEscapedChar '{' = True +isGlobEscapedChar '}' = True +isGlobEscapedChar ',' = True +isGlobEscapedChar '\\' = True +isGlobEscapedChar '/' = True +isGlobEscapedChar _ = False \ No newline at end of file diff --git a/cabal-install/Distribution/Client/IndexUtils.hs b/cabal-install/Distribution/Client/IndexUtils.hs index 042ee60976d..95545fc8b6f 100644 --- a/cabal-install/Distribution/Client/IndexUtils.hs +++ b/cabal-install/Distribution/Client/IndexUtils.hs @@ -17,7 +17,9 @@ module Distribution.Client.IndexUtils ( getIndexFileAge, getInstalledPackages, + Configure.getInstalledPackagesMonitorFiles, getSourcePackages, + getSourcePackagesMonitorFiles, Index(..), PackageEntry(..), @@ -52,7 +54,7 @@ import Distribution.Simple.Compiler import Distribution.Simple.Program ( ProgramConfiguration ) import qualified Distribution.Simple.Configure as Configure - ( getInstalledPackages ) + ( getInstalledPackages, getInstalledPackagesMonitorFiles ) import Distribution.ParseUtils ( ParseResult(..) ) import Distribution.Version @@ -204,6 +206,13 @@ readRepoIndex verbosity repoCtxt repo = getIndexFileAge :: Repo -> IO Double getIndexFileAge repo = getFileAge $ repoLocalDir repo "00-index.tar" +-- | A set of files (or directories) that can be monitored to detect when +-- there might have been a change in the source packages. +-- +getSourcePackagesMonitorFiles :: [Repo] -> [FilePath] +getSourcePackagesMonitorFiles repos = + [ repoLocalDir repo "00-index.cache" + | repo <- repos ] -- | It is not necessary to call this, as the cache will be updated when the -- index is read normally. However you can do the work earlier if you like. diff --git a/cabal-install/Distribution/Client/RebuildMonad.hs b/cabal-install/Distribution/Client/RebuildMonad.hs new file mode 100644 index 00000000000..edea8984c86 --- /dev/null +++ b/cabal-install/Distribution/Client/RebuildMonad.hs @@ -0,0 +1,112 @@ +{-# LANGUAGE CPP #-} +{-# LANGUAGE GeneralizedNewtypeDeriving #-} + +-- | An abstraction for re-running actions if values or files have changed. +-- +-- This is not a full-blown make-style incremental build system, it's a bit +-- more ad-hoc than that, but it's easier to integrate with existing code. +-- +-- It's a convenient interface to the "Distribution.Client.FileMonitor" +-- functions. +-- +module Distribution.Client.RebuildMonad ( + -- * Rebuild monad + Rebuild, + runRebuild, + + -- * Setting up file monitoring + monitorFiles, + MonitorFilePath(..), + monitorFileSearchPath, + FilePathGlob(..), + + -- * Using a file monitor + FileMonitor(..), + newFileMonitor, + rerunIfChanged, + + -- * Utils + matchFileGlob, + ) where + +import Distribution.Client.FileMonitor + ( MonitorFilePath(..), monitorFileSearchPath + , FilePathGlob(..), matchFileGlob + , FileMonitor(..), newFileMonitor + , MonitorChanged(..), MonitorChangedReason(..) + , checkFileMonitorChanged, updateFileMonitor ) + +import Distribution.Simple.Utils (debug) +import Distribution.Verbosity (Verbosity) + +#if !MIN_VERSION_base(4,8,0) +import Control.Applicative +#endif +import Control.Monad.State as State +import Distribution.Compat.Binary (Binary) +import System.FilePath (takeFileName) + + +-- | A monad layered on top of 'IO' to help with re-running actions when the +-- input files and values they depend on change. The crucial operations are +-- 'rerunIfChanged' and 'monitorFiles'. +-- +newtype Rebuild a = Rebuild (StateT [MonitorFilePath] IO a) + deriving (Functor, Applicative, Monad, MonadIO) + +-- | Use this wihin the body action of 'rerunIfChanged' to declare that the +-- action depends on the given files. This can be based on what the action +-- actually did. It is these files that will be checked for changes next +-- time 'rerunIfChanged' is called for that 'FileMonitor'. +-- +monitorFiles :: [MonitorFilePath] -> Rebuild () +monitorFiles filespecs = Rebuild (State.modify (filespecs++)) + +-- | Run a 'Rebuild' IO action. +unRebuild :: Rebuild a -> IO (a, [MonitorFilePath]) +unRebuild (Rebuild action) = runStateT action [] + +-- | Run a 'Rebuild' IO action. +runRebuild :: Rebuild a -> IO a +runRebuild (Rebuild action) = evalStateT action [] + +-- | This captures the standard use pattern for a 'FileMonitor': given a +-- monitor, an action and the input value the action depends on, either +-- re-run the action to get its output, or if the value and files the action +-- depends on have not changed then return a previously cached action result. +-- +-- The result is still in the 'Rebuild' monad, so these can be nested. +-- +-- Do not share 'FileMonitor's between different uses of 'rerunIfChanged'. +-- +rerunIfChanged :: (Eq a, Binary a, Binary b) + => Verbosity + -> FilePath + -> FileMonitor a b + -> a + -> Rebuild b + -> Rebuild b +rerunIfChanged verbosity rootDir monitor key action = do + changed <- liftIO $ checkFileMonitorChanged monitor rootDir key + case changed of + MonitorUnchanged result files -> do + liftIO $ debug verbosity $ "File monitor '" ++ monitorName + ++ "' unchanged." + monitorFiles files + return result + + MonitorChanged reason -> do + liftIO $ debug verbosity $ "File monitor '" ++ monitorName + ++ "' changed: " ++ showReason reason + (result, files) <- liftIO $ unRebuild action + liftIO $ updateFileMonitor monitor rootDir files key result + monitorFiles files + return result + where + monitorName = takeFileName (fileMonitorCacheFile monitor) + + showReason (MonitoredFileChanged file) = "file " ++ file + showReason (MonitoredValueChanged _) = "monitor value changed" + showReason MonitorFirstRun = "first run" + showReason MonitorCorruptCache = "invalid cache file" + diff --git a/cabal-install/Main.hs b/cabal-install/Main.hs index 872ffd0fc65..fe70293877c 100644 --- a/cabal-install/Main.hs +++ b/cabal-install/Main.hs @@ -66,6 +66,10 @@ import Distribution.Client.Targets import qualified Distribution.Client.List as List ( list, info ) +--TODO: temporary import, just to force these modules to be built. +-- It will be replaced by import of new build command once merged. +import Distribution.Client.RebuildMonad () + import Distribution.Client.Install (install) import Distribution.Client.Configure (configure) import Distribution.Client.Update (update) diff --git a/cabal-install/cabal-install.cabal b/cabal-install/cabal-install.cabal index a45cd0d6265..24f266e9c04 100644 --- a/cabal-install/cabal-install.cabal +++ b/cabal-install/cabal-install.cabal @@ -143,8 +143,10 @@ executable cabal Distribution.Client.Exec Distribution.Client.Fetch Distribution.Client.FetchUtils + Distribution.Client.FileMonitor Distribution.Client.Freeze Distribution.Client.Get + Distribution.Client.Glob Distribution.Client.GlobalFlags Distribution.Client.GZipUtils Distribution.Client.Haddock @@ -165,6 +167,7 @@ executable cabal Distribution.Client.ParseUtils Distribution.Client.PlanIndex Distribution.Client.Run + Distribution.Client.RebuildMonad Distribution.Client.Sandbox Distribution.Client.Sandbox.Index Distribution.Client.Sandbox.PackageEnvironment @@ -200,6 +203,7 @@ executable cabal Cabal >= 1.23.1 && < 1.24, containers >= 0.4 && < 0.6, filepath >= 1.3 && < 1.5, + hashable >= 1.0 && < 2, HTTP >= 4000.1.5 && < 4000.4, mtl >= 2.0 && < 3, pretty >= 1.1 && < 1.2, @@ -255,6 +259,7 @@ Test-Suite unit-tests UnitTests.Distribution.Client.Dependency.Modular.PSQ UnitTests.Distribution.Client.Dependency.Modular.Solver UnitTests.Distribution.Client.Dependency.Modular.DSL + UnitTests.Distribution.Client.FileMonitor UnitTests.Distribution.Client.GZipUtils UnitTests.Distribution.Client.Sandbox UnitTests.Distribution.Client.Tar @@ -270,6 +275,7 @@ Test-Suite unit-tests process, directory, filepath, + hashable, stm, tar, time, diff --git a/cabal-install/tests/UnitTests.hs b/cabal-install/tests/UnitTests.hs index 0030760f207..1e3bfef1575 100644 --- a/cabal-install/tests/UnitTests.hs +++ b/cabal-install/tests/UnitTests.hs @@ -11,6 +11,7 @@ import qualified UnitTests.Distribution.Client.Targets import qualified UnitTests.Distribution.Client.GZipUtils import qualified UnitTests.Distribution.Client.Dependency.Modular.PSQ import qualified UnitTests.Distribution.Client.Dependency.Modular.Solver +import qualified UnitTests.Distribution.Client.FileMonitor tests :: TestTree tests = testGroup "Unit Tests" [ @@ -28,6 +29,8 @@ tests = testGroup "Unit Tests" [ UnitTests.Distribution.Client.Dependency.Modular.PSQ.tests ,testGroup "UnitTests.Distribution.Client.Dependency.Modular.Solver" UnitTests.Distribution.Client.Dependency.Modular.Solver.tests + ,testGroup "UnitTests.Distribution.Client.FileMonitor" + UnitTests.Distribution.Client.FileMonitor.tests ] -- Extra options for running the test suite diff --git a/cabal-install/tests/UnitTests/Distribution/Client/FileMonitor.hs b/cabal-install/tests/UnitTests/Distribution/Client/FileMonitor.hs new file mode 100644 index 00000000000..09971e2bf2c --- /dev/null +++ b/cabal-install/tests/UnitTests/Distribution/Client/FileMonitor.hs @@ -0,0 +1,558 @@ +{-# LANGUAGE CPP #-} +{-# LANGUAGE GeneralizedNewtypeDeriving #-} + +module UnitTests.Distribution.Client.FileMonitor (tests) where + +import Control.Monad +import Control.Exception +import Control.Concurrent (threadDelay) +import qualified Data.Set as Set +import System.FilePath +import System.Directory + +import Distribution.Text (simpleParse) +import Distribution.Compat.Binary +import Distribution.Simple.Utils (withTempDirectory) +import Distribution.Verbosity (silent) + +import Distribution.Client.FileMonitor + +import Test.Tasty +import Test.Tasty.HUnit + + +tests :: [TestTree] +tests = + [ testCase "sanity check mtimes" testFileMTimeSanity + , testCase "no monitor cache" testNoMonitorCache + , testCase "corrupt monitor cache" testCorruptMonitorCache + , testCase "empty monitor" testEmptyMonitor + , testCase "missing file" testMissingFile + , testCase "change file" testChangedFile + , testCase "file mtime vs content" testChangedFileMtimeVsContent + , testCase "remove file" testRemoveFile + , testCase "non-existent file" testNonExistentFile + + , testGroup "glob matches" + [ testCase "no change" testGlobNoChange + , testCase "add match" testGlobAddMatch + , testCase "remove match" testGlobRemoveMatch + , testCase "change match" testGlobChangeMatch + + , testCase "add match subdir" testGlobAddMatchSubdir + , testCase "remove match subdir" testGlobRemoveMatchSubdir + , testCase "change match subdir" testGlobChangeMatchSubdir + + , testCase "add non-match" testGlobAddNonMatch + , testCase "remove non-match" testGlobRemoveNonMatch + + , testCase "add non-match" testGlobAddNonMatchSubdir + , testCase "remove non-match" testGlobRemoveNonMatchSubdir + + , testCase "invariant sorted 1" testInvariantMonitorStateGlobFiles + , testCase "invariant sorted 2" testInvariantMonitorStateGlobDirs + ] + + , testCase "value unchanged" testValueUnchanged + , testCase "value changed" testValueChanged + , testCase "value & file changed" testValueAndFileChanged + , testCase "value updated" testValueUpdated + ] + +-- we rely on file mtimes having a reasonable resolution +testFileMTimeSanity :: Assertion +testFileMTimeSanity = do + withTempDirectory silent "." "file-status-" $ \dir -> do + replicateM_ 10 $ do + writeFile (dir "a") "content" + t1 <- getModificationTime (dir "a") + threadDelayMTimeChange + writeFile (dir "a") "content" + t2 <- getModificationTime (dir "a") + assertBool "expected different file mtimes" (t2 > t1) + +-- first run, where we don't even call updateMonitor +testNoMonitorCache :: Assertion +testNoMonitorCache = + withFileMonitor $ \root monitor -> do + reason <- expectMonitorChanged root (monitor :: FileMonitor () ()) () + reason @?= MonitorFirstRun + +-- write garbage into the binary cache file +testCorruptMonitorCache :: Assertion +testCorruptMonitorCache = + withFileMonitor $ \root monitor -> do + writeFile (fileMonitorCacheFile monitor) "broken" + reason <- expectMonitorChanged root monitor () + reason @?= MonitorCorruptCache + + updateMonitor root monitor [] () () + (res, files) <- expectMonitorUnchanged root monitor () + res @?= () + files @?= [] + + writeFile (fileMonitorCacheFile monitor) "broken" + reason2 <- expectMonitorChanged root monitor () + reason2 @?= MonitorCorruptCache + +-- no files to monitor +testEmptyMonitor :: Assertion +testEmptyMonitor = + withFileMonitor $ \root monitor -> do + touch root "a" + updateMonitor root monitor [] () () + touch root "b" + (res, files) <- expectMonitorUnchanged root monitor () + res @?= () + files @?= [] + +-- monitor a file that is expected to exist +testMissingFile :: Assertion +testMissingFile = do + test MonitorFile "a" + test MonitorFileHashed "a" + test MonitorFile "dir/a" + test MonitorFileHashed "dir/a" + where + test monitorKind file = + withFileMonitor $ \root monitor -> do + -- a file that doesn't exist at snapshot time is considered to have + -- changed + updateMonitor root monitor [monitorKind file] () () + reason <- expectMonitorChanged root monitor () + reason @?= MonitoredFileChanged file + + -- a file doesn't exist at snapshot time, but gets added afterwards is + -- also considered to have changed + updateMonitor root monitor [monitorKind file] () () + touch root file + reason2 <- expectMonitorChanged root monitor () + reason2 @?= MonitoredFileChanged file + + +testChangedFile :: Assertion +testChangedFile = do + test MonitorFile "a" + test MonitorFileHashed "a" + test MonitorFile "dir/a" + test MonitorFileHashed "dir/a" + where + test monitorKind file = + withFileMonitor $ \root monitor -> do + touch root file + updateMonitor root monitor [monitorKind file] () () + threadDelayMTimeChange + write root file "different" + reason <- expectMonitorChanged root monitor () + reason @?= MonitoredFileChanged file + + +testChangedFileMtimeVsContent :: Assertion +testChangedFileMtimeVsContent = + withFileMonitor $ \root monitor -> do + -- if we don't touch the file, it's unchanged + touch root "a" + updateMonitor root monitor [MonitorFile "a"] () () + (res, files) <- expectMonitorUnchanged root monitor () + res @?= () + files @?= [MonitorFile "a"] + + -- if we do touch the file, it's changed if we only consider mtime + updateMonitor root monitor [MonitorFile "a"] () () + threadDelayMTimeChange + touch root "a" + reason <- expectMonitorChanged root monitor () + reason @?= MonitoredFileChanged "a" + + -- but if we touch the file, it's unchanged if we consider content hash + updateMonitor root monitor [MonitorFileHashed "a"] () () + threadDelayMTimeChange + touch root "a" + (res2, files2) <- expectMonitorUnchanged root monitor () + res2 @?= () + files2 @?= [MonitorFileHashed "a"] + + -- finally if we change the content it's changed + updateMonitor root monitor [MonitorFileHashed "a"] () () + threadDelayMTimeChange + write root "a" "different" + reason2 <- expectMonitorChanged root monitor () + reason2 @?= MonitoredFileChanged "a" + + +testRemoveFile :: Assertion +testRemoveFile = do + test MonitorFile "a" + test MonitorFileHashed "a" + test MonitorFile "dir/a" + test MonitorFileHashed "dir/a" + where + test monitorKind file = + withFileMonitor $ \root monitor -> do + touch root file + updateMonitor root monitor [monitorKind file] () () + remove root file + reason <- expectMonitorChanged root monitor () + reason @?= MonitoredFileChanged file + + +-- monitor a file that we expect not to exist +testNonExistentFile :: Assertion +testNonExistentFile = + withFileMonitor $ \root monitor -> do + -- a file that doesn't exist at snapshot time or check time is unchanged + updateMonitor root monitor [MonitorNonExistentFile "a"] () () + (res, files) <- expectMonitorUnchanged root monitor () + res @?= () + files @?= [MonitorNonExistentFile "a"] + + -- if the file then exists it has changed + touch root "a" + reason <- expectMonitorChanged root monitor () + reason @?= MonitoredFileChanged "a" + + -- if the file then exists at snapshot and check time it has changed + updateMonitor root monitor [MonitorNonExistentFile "a"] () () + reason2 <- expectMonitorChanged root monitor () + reason2 @?= MonitoredFileChanged "a" + + -- but if the file existed at snapshot time and doesn't exist at check time + -- it is consider unchanged. This is unlike files we expect to exist, but + -- that's because files that exist can have different content and actions + -- can depend on that content, whereas if the action expected a file not to + -- exist and it now does not, it'll give the same result, irrespective of + -- the fact that the file might have existed in the meantime. + updateMonitor root monitor [MonitorNonExistentFile "a"] () () + remove root "a" + (res2, files2) <- expectMonitorUnchanged root monitor () + res2 @?= () + files2 @?= [MonitorNonExistentFile "a"] + + +------------------ +-- globs +-- + +testGlobNoChange :: Assertion +testGlobNoChange = + withFileMonitor $ \root monitor -> do + touch root "dir/good-a" + touch root "dir/good-b" + updateMonitor root monitor [monitorFileGlob "dir/good-*"] () () + (res, files) <- expectMonitorUnchanged root monitor () + res @?= () + files @?= [monitorFileGlob "dir/good-*"] + +testGlobAddMatch :: Assertion +testGlobAddMatch = + withFileMonitor $ \root monitor -> do + touch root "dir/good-a" + updateMonitor root monitor [monitorFileGlob "dir/good-*"] () () + (res, files) <- expectMonitorUnchanged root monitor () + res @?= () + files @?= [monitorFileGlob "dir/good-*"] + + threadDelayMTimeChange + touch root "dir/good-b" + reason <- expectMonitorChanged root monitor () + reason @?= MonitoredFileChanged "dir/good-b" + +testGlobRemoveMatch :: Assertion +testGlobRemoveMatch = + withFileMonitor $ \root monitor -> do + touch root "dir/good-a" + touch root "dir/good-b" + updateMonitor root monitor [monitorFileGlob "dir/good-*"] () () + remove root "dir/good-a" + reason <- expectMonitorChanged root monitor () + reason @?= MonitoredFileChanged "dir/good-a" + +testGlobChangeMatch :: Assertion +testGlobChangeMatch = + withFileMonitor $ \root monitor -> do + touch root "dir/good-a" + touch root "dir/good-b" + updateMonitor root monitor [monitorFileGlob "dir/good-*"] () () + threadDelayMTimeChange + touch root "dir/good-b" + (res, files) <- expectMonitorUnchanged root monitor () + res @?= () + files @?= [monitorFileGlob "dir/good-*"] + + write root "dir/good-b" "different" + reason <- expectMonitorChanged root monitor () + reason @?= MonitoredFileChanged "dir/good-b" + +testGlobAddMatchSubdir :: Assertion +testGlobAddMatchSubdir = + withFileMonitor $ \root monitor -> do + touch root "dir/a/good-a" + updateMonitor root monitor [monitorFileGlob "dir/*/good-*"] () () + threadDelayMTimeChange + touch root "dir/b/good-b" + reason <- expectMonitorChanged root monitor () + reason @?= MonitoredFileChanged "dir/b/good-b" + +testGlobRemoveMatchSubdir :: Assertion +testGlobRemoveMatchSubdir = + withFileMonitor $ \root monitor -> do + touch root "dir/a/good-a" + touch root "dir/b/good-b" + updateMonitor root monitor [monitorFileGlob "dir/*/good-*"] () () + removeDir root "dir/a" + reason <- expectMonitorChanged root monitor () + reason @?= MonitoredFileChanged "dir/a/good-a" + +testGlobChangeMatchSubdir :: Assertion +testGlobChangeMatchSubdir = + withFileMonitor $ \root monitor -> do + touch root "dir/a/good-a" + touch root "dir/b/good-b" + updateMonitor root monitor [monitorFileGlob "dir/*/good-*"] () () + threadDelayMTimeChange + touch root "dir/b/good-b" + (res, files) <- expectMonitorUnchanged root monitor () + res @?= () + files @?= [monitorFileGlob "dir/*/good-*"] + + write root "dir/b/good-b" "different" + reason <- expectMonitorChanged root monitor () + reason @?= MonitoredFileChanged "dir/b/good-b" + +testGlobAddNonMatch :: Assertion +testGlobAddNonMatch = + withFileMonitor $ \root monitor -> do + touch root "dir/good-a" + updateMonitor root monitor [monitorFileGlob "dir/good-*"] () () + threadDelayMTimeChange + touch root "dir/bad" + (res, files) <- expectMonitorUnchanged root monitor () + res @?= () + files @?= [monitorFileGlob "dir/good-*"] + +testGlobRemoveNonMatch :: Assertion +testGlobRemoveNonMatch = + withFileMonitor $ \root monitor -> do + touch root "dir/good-a" + touch root "dir/bad" + updateMonitor root monitor [monitorFileGlob "dir/good-*"] () () + remove root "dir/bad" + (res, files) <- expectMonitorUnchanged root monitor () + res @?= () + files @?= [monitorFileGlob "dir/good-*"] + +testGlobAddNonMatchSubdir :: Assertion +testGlobAddNonMatchSubdir = + withFileMonitor $ \root monitor -> do + touch root "dir/a/good-a" + updateMonitor root monitor [monitorFileGlob "dir/*/good-*"] () () + threadDelayMTimeChange + touch root "dir/b/bad" + (res, files) <- expectMonitorUnchanged root monitor () + res @?= () + files @?= [monitorFileGlob "dir/*/good-*"] + +testGlobRemoveNonMatchSubdir :: Assertion +testGlobRemoveNonMatchSubdir = + withFileMonitor $ \root monitor -> do + touch root "dir/a/good-a" + touch root "dir/b/bad" + updateMonitor root monitor [monitorFileGlob "dir/*/good-*"] () () + removeDir root "dir/b" + (res, files) <- expectMonitorUnchanged root monitor () + res @?= () + files @?= [monitorFileGlob "dir/*/good-*"] + + +-- try and tickle a bug that happens if we don't maintain the invariant that +-- MonitorStateGlobFiles entries are sorted +testInvariantMonitorStateGlobFiles :: Assertion +testInvariantMonitorStateGlobFiles = + withFileMonitor $ \root monitor -> do + touch root "dir/a" + touch root "dir/b" + touch root "dir/c" + touch root "dir/d" + updateMonitor root monitor [monitorFileGlob "dir/*"] () () + threadDelayMTimeChange + -- so there should be no change (since we're doing content checks) + -- but if we can get the dir entries to appear in the wrong order + -- then if the sorted invariant is not maintained then we can fool + -- the 'probeGlobStatus' into thinking there's changes + remove root "dir/a" + remove root "dir/b" + remove root "dir/c" + remove root "dir/d" + touch root "dir/d" + touch root "dir/c" + touch root "dir/b" + touch root "dir/a" + (res, files) <- expectMonitorUnchanged root monitor () + res @?= () + files @?= [monitorFileGlob "dir/*"] + +-- same thing for the subdirs case +testInvariantMonitorStateGlobDirs :: Assertion +testInvariantMonitorStateGlobDirs = + withFileMonitor $ \root monitor -> do + touch root "dir/a/file" + touch root "dir/b/file" + touch root "dir/c/file" + touch root "dir/d/file" + updateMonitor root monitor [monitorFileGlob "dir/*/file"] () () + threadDelayMTimeChange + removeDir root "dir/a" + removeDir root "dir/b" + removeDir root "dir/c" + removeDir root "dir/d" + touch root "dir/d/file" + touch root "dir/c/file" + touch root "dir/b/file" + touch root "dir/a/file" + (res, files) <- expectMonitorUnchanged root monitor () + res @?= () + files @?= [monitorFileGlob "dir/*/file"] + + +------------------ +-- value changes +-- + +testValueUnchanged :: Assertion +testValueUnchanged = + withFileMonitor $ \root monitor -> do + touch root "a" + updateMonitor root monitor [MonitorFile "a"] (42 :: Int) "ok" + (res, files) <- expectMonitorUnchanged root monitor 42 + res @?= "ok" + files @?= [MonitorFile "a"] + +testValueChanged :: Assertion +testValueChanged = + withFileMonitor $ \root monitor -> do + touch root "a" + updateMonitor root monitor [MonitorFile "a"] (42 :: Int) "ok" + reason <- expectMonitorChanged root monitor 43 + reason @?= MonitoredValueChanged 42 + +testValueAndFileChanged :: Assertion +testValueAndFileChanged = + withFileMonitor $ \root monitor -> do + touch root "a" + + -- we change the value and the file, and the value change is reported + updateMonitor root monitor [MonitorFile "a"] (42 :: Int) "ok" + threadDelayMTimeChange + touch root "a" + reason <- expectMonitorChanged root monitor 43 + reason @?= MonitoredValueChanged 42 + + -- if fileMonitorCheckIfOnlyValueChanged then if only the value changed + -- then it's reported as MonitoredValueChanged + let monitor' :: FileMonitor Int String + monitor' = monitor { fileMonitorCheckIfOnlyValueChanged = True } + updateMonitor root monitor' [MonitorFile "a"] 42 "ok" + reason2 <- expectMonitorChanged root monitor' 43 + reason2 @?= MonitoredValueChanged 42 + + -- but if a file changed too then we don't report MonitoredValueChanged + updateMonitor root monitor' [MonitorFile "a"] 42 "ok" + threadDelayMTimeChange + touch root "a" + reason3 <- expectMonitorChanged root monitor' 43 + reason3 @?= MonitoredFileChanged "a" + +testValueUpdated :: Assertion +testValueUpdated = + withFileMonitor $ \root monitor -> do + touch root "a" + + let monitor' :: FileMonitor (Set.Set Int) String + monitor' = (monitor :: FileMonitor (Set.Set Int) String) { + fileMonitorCheckIfOnlyValueChanged = True, + fileMonitorKeyValid = Set.isSubsetOf + } + + updateMonitor root monitor' [MonitorFile "a"] (Set.fromList [42,43]) "ok" + (res,_files) <- expectMonitorUnchanged root monitor' (Set.fromList [42]) + res @?= "ok" + + reason <- expectMonitorChanged root monitor' (Set.fromList [42,44]) + reason @?= MonitoredValueChanged (Set.fromList [42,43]) + + +------------- +-- Utils + +newtype RootPath = RootPath FilePath + +write :: RootPath -> FilePath -> String -> IO () +write (RootPath root) fname contents = do + let path = root fname + createDirectoryIfMissing True (takeDirectory path) + writeFile path contents + +touch :: RootPath -> FilePath -> IO () +touch root fname = write root fname "hello" + +-- Wait a moment to ensure a file mtime change +threadDelayMTimeChange :: IO () +#if WIN32 || (MIN_VERSION_directory(1,2,1) && MIN_VERSION_unix(2,6,0)) +-- hi-res file times +threadDelayMTimeChange = threadDelay 10000 -- 10ms +#else +-- second-res file times +threadDelayMTimeChange = threadDelay 1000000 -- 1s +#endif + +remove :: RootPath -> FilePath -> IO () +remove (RootPath root) fname = removeFile (root fname) + +removeDir :: RootPath -> FilePath -> IO () +removeDir (RootPath root) dname = removeDirectoryRecursive (root dname) + +monitorFileGlob :: String -> MonitorFilePath +monitorFileGlob globstr + | Just glob <- simpleParse globstr = MonitorFileGlob glob + | otherwise = error $ "Failed to parse " ++ globstr + + +expectMonitorChanged :: (Binary a, Binary b) + => RootPath -> FileMonitor a b -> a + -> IO (MonitorChangedReason a) +expectMonitorChanged root monitor key = do + res <- checkChanged root monitor key + case res of + MonitorChanged reason -> return reason + MonitorUnchanged _ _ -> throwIO $ HUnitFailure "expected change" + +expectMonitorUnchanged :: (Binary a, Binary b) + => RootPath -> FileMonitor a b -> a + -> IO (b, [MonitorFilePath]) +expectMonitorUnchanged root monitor key = do + res <- checkChanged root monitor key + case res of + MonitorChanged _reason -> throwIO $ HUnitFailure "expected no change" + MonitorUnchanged b files -> return (b, files) + +checkChanged :: (Binary a, Binary b) + => RootPath -> FileMonitor a b + -> a -> IO (MonitorChanged a b) +checkChanged (RootPath root) monitor key = + checkFileMonitorChanged monitor root key + +updateMonitor :: (Binary a, Binary b) + => RootPath -> FileMonitor a b + -> [MonitorFilePath] -> a -> b -> IO () +updateMonitor (RootPath root) monitor files key result = + updateFileMonitor monitor root files key result + +withFileMonitor :: Eq a => (RootPath -> FileMonitor a b -> IO c) -> IO c +withFileMonitor action = do + withTempDirectory silent "." "file-status-" $ \root -> do + let monitorFile = root <.> "monitor" + monitor = newFileMonitor monitorFile + finally (action (RootPath root) monitor) $ do + exists <- doesFileExist monitorFile + when exists $ removeFile monitorFile +