Skip to content

feat(bloomfilter): add salt #742

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 13 commits into
base: main
Choose a base branch
from
17 changes: 13 additions & 4 deletions bench/macro/lsm-tree-bench-bloomfilter.hs
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ import Text.Printf (printf)
import Database.LSMTree.Extras.Orphans ()
import Database.LSMTree.Internal.Assertions (fromIntegralChecked)
import qualified Database.LSMTree.Internal.BloomFilter as Bloom
import Database.LSMTree.Internal.Paths (SessionSalt (..))
import Database.LSMTree.Internal.Serialise (SerialisedKey,
serialiseKey)

Expand Down Expand Up @@ -108,7 +109,7 @@ benchmarks = do
benchmark "bloomQueries"
"(this is the batch lookup, less the cost of computing and hashing the keys)"
(benchInBatches benchmarkBatchSize rng0
(\ks -> Bloom.bloomQueries vbs ks `seq` ()))
(\ks -> Bloom.bloomQueries sessionSalt vbs ks `seq` ()))
(fromIntegralChecked benchmarkNumLookups)
hashcost
0
Expand Down Expand Up @@ -200,6 +201,11 @@ totalNumEntriesSanityCheck l1 filterSizes =
==
sum [ 2^l1 * sizeFactor | (_, sizeFactor, _) <- filterSizes ]

sessionSalt :: SessionSalt
sessionSalt = SessionSalt bloomSalt

bloomSalt :: Bloom.Salt
bloomSalt = 4

-- | Input environment for benchmarking 'Bloom.elemMany'.
--
Expand All @@ -223,7 +229,10 @@ elemManyEnv :: [BloomFilterSizeInfo]
elemManyEnv filterSizes rng0 =
stToIO $ do
-- create the filters
mbs <- sequence [ Bloom.new bsize | (_, _, bsize) <- filterSizes ]
mbs <- sequence
[ Bloom.new bsize bloomSalt
| (_, _, bsize) <- filterSizes
]
-- add elements
foldM_
(\rng (i, mb) -> do
Expand Down Expand Up @@ -264,7 +273,7 @@ benchInBatches !b !rng0 !action =
benchMakeHashes :: Vector (Bloom SerialisedKey) -> BatchBench
benchMakeHashes !_bs !ks =
let khs :: VP.Vector (Bloom.Hashes SerialisedKey)
!khs = V.convert (V.map Bloom.hashes ks)
!khs = V.convert (V.map (Bloom.hashesWithSalt bloomSalt) ks)
in khs `seq` ()

-- | This gives us a combined cost of calculating the series of keys, their
Expand All @@ -273,7 +282,7 @@ benchMakeHashes !_bs !ks =
benchElemHashes :: Vector (Bloom SerialisedKey) -> BatchBench
benchElemHashes !bs !ks =
let khs :: VP.Vector (Bloom.Hashes SerialisedKey)
!khs = V.convert (V.map Bloom.hashes ks)
!khs = V.convert (V.map (Bloom.hashesWithSalt bloomSalt) ks)
in V.foldl'
(\_ b -> VP.foldl'
(\_ kh -> Bloom.elemHashes b kh `seq` ())
Expand Down
18 changes: 11 additions & 7 deletions bench/macro/lsm-tree-bench-lookups.hs
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,8 @@ import Database.LSMTree.Internal.Entry (Entry (Insert),
import Database.LSMTree.Internal.Index (Index)
import qualified Database.LSMTree.Internal.Index as Index (IndexType (Compact))
import Database.LSMTree.Internal.Lookup
import Database.LSMTree.Internal.Paths (RunFsPaths (RunFsPaths))
import Database.LSMTree.Internal.Paths (RunFsPaths (RunFsPaths),
SessionSalt (SessionSalt))
import Database.LSMTree.Internal.Run (Run)
import qualified Database.LSMTree.Internal.Run as Run
import Database.LSMTree.Internal.RunAcc (RunBloomFilterAlloc (..))
Expand Down Expand Up @@ -129,6 +130,9 @@ entryBitsWithOverhead = entryBits -- key and value size
numEntriesFitInPage :: Fractional a => a
numEntriesFitInPage = fromIntegral unusedPageBits / fromIntegral entryBitsWithOverhead

sessionSalt :: SessionSalt
sessionSalt = SessionSalt 4

benchmarks :: Run.RunDataCaching -> IO ()
benchmarks !caching = withFS $ \hfs hbio -> do
#ifdef NO_IGNORE_ASSERTS
Expand Down Expand Up @@ -351,7 +355,7 @@ lookupsEnv runSizes keyRng0 hfs hbio caching = do

-- create the runs
rbs <- sequence
[ RunBuilder.new hfs hbio
[ RunBuilder.new hfs hbio sessionSalt
RunParams {
runParamCaching = caching,
runParamAlloc = RunAllocFixed benchmarkNumBitsPerEntry,
Expand Down Expand Up @@ -428,7 +432,7 @@ benchBloomQueries !bs !keyRng !n
| n <= 0 = ()
| otherwise =
let (!ks, !keyRng') = genLookupBatch keyRng benchmarkGenBatchSize
in bloomQueries bs ks `seq`
in bloomQueries sessionSalt bs ks `seq`
benchBloomQueries bs keyRng' (n-benchmarkGenBatchSize)

-- | This gives us the combined cost of calculating batches of keys, performing
Expand All @@ -445,7 +449,7 @@ benchIndexSearches !arenaManager !bs !ics !hs !keyRng !n
| n <= 0 = pure ()
| otherwise = do
let (!ks, !keyRng') = genLookupBatch keyRng benchmarkGenBatchSize
!rkixs = bloomQueries bs ks
!rkixs = bloomQueries sessionSalt bs ks
!_ioops <- withArena arenaManager $ \arena -> stToIO $ indexSearches arena ics hs ks rkixs
benchIndexSearches arenaManager bs ics hs keyRng' (n-benchmarkGenBatchSize)

Expand All @@ -463,7 +467,7 @@ benchPrepLookups !arenaManager !bs !ics !hs !keyRng !n
| n <= 0 = pure ()
| otherwise = do
let (!ks, !keyRng') = genLookupBatch keyRng benchmarkGenBatchSize
(!_rkixs, !_ioops) <- withArena arenaManager $ \arena -> stToIO $ prepLookups arena bs ics hs ks
(!_rkixs, !_ioops) <- withArena arenaManager $ \arena -> stToIO $ prepLookups arena sessionSalt bs ics hs ks
benchPrepLookups arenaManager bs ics hs keyRng' (n-benchmarkGenBatchSize)

-- | This gives us the combined cost of calculating batches of keys, and
Expand All @@ -489,7 +493,7 @@ benchLookupsIO !hbio !arenaManager !resolve !wb !wbblobs !rs !bs !ics !hs =
| otherwise = do
let (!ks, !keyRng') = genLookupBatch keyRng benchmarkGenBatchSize
!_ <- lookupsIOWithWriteBuffer
hbio arenaManager resolve wb wbblobs rs bs ics hs ks
hbio arenaManager resolve sessionSalt wb wbblobs rs bs ics hs ks
go keyRng' (n-benchmarkGenBatchSize)

{-------------------------------------------------------------------------------
Expand Down Expand Up @@ -524,7 +528,7 @@ classifyLookups !bs !keyRng0 !n0 =
| otherwise =
unsafePerformIO (putStr ".") `seq`
let (!ks, !keyRng') = genLookupBatch keyRng benchmarkGenBatchSize
!rkixs = bloomQueries bs ks
!rkixs = bloomQueries sessionSalt bs ks
in loop (positives + VP.length rkixs) keyRng' (n-benchmarkGenBatchSize)

-- | Fill a mutable vector with uniformly random values.
Expand Down
7 changes: 5 additions & 2 deletions bench/macro/lsm-tree-bench-wp8.hs
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,9 @@ benchTableConfig :: LSM.TableConfig
benchTableConfig =
LSM.defaultTableConfig {LSM.confFencePointerIndex = LSM.CompactIndex}

benchSalt :: LSM.Salt
benchSalt = 4

-------------------------------------------------------------------------------
-- Keys and values
-------------------------------------------------------------------------------
Expand Down Expand Up @@ -413,7 +416,7 @@ doSetup' gopts opts = do

let name = LSM.toSnapshotName "bench"

LSM.withSession (mkTracer gopts) hasFS hasBlockIO (FS.mkFsPath []) $ \session -> do
LSM.withSession (mkTracer gopts) hasFS hasBlockIO benchSalt (FS.mkFsPath []) $ \session -> do
tbl <- LSM.newTableWith @IO @K @V @B (mkTableConfigSetup gopts opts benchTableConfig) session

forM_ (groupsOfN 256 [ 0 .. initialSize gopts ]) $ \batch -> do
Expand Down Expand Up @@ -575,7 +578,7 @@ doRun gopts opts = do

let name = LSM.toSnapshotName "bench"

LSM.withSession (mkTracer gopts) hasFS hasBlockIO (FS.mkFsPath []) $ \session ->
LSM.withSession (mkTracer gopts) hasFS hasBlockIO benchSalt (FS.mkFsPath []) $ \session ->
withLatencyHandle $ \h -> do
-- open snapshot
-- In checking mode we start with an empty table, since our pure
Expand Down
12 changes: 8 additions & 4 deletions bench/micro/Bench/Database/LSMTree.hs
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import Control.DeepSeq
import Control.Exception
import Control.Tracer
import Criterion.Main
import qualified Data.BloomFilter.Hash as Bloom
import Data.ByteString.Short (ShortByteString)
import qualified Data.ByteString.Short as SBS
import Data.Foldable
Expand Down Expand Up @@ -82,6 +83,9 @@ benchConfig = defaultTableConfig
, confFencePointerIndex = CompactIndex
}

bloomSalt :: Bloom.Salt
bloomSalt = 4

{-------------------------------------------------------------------------------
Large Value vs. Small Value Blob
-------------------------------------------------------------------------------}
Expand Down Expand Up @@ -135,7 +139,7 @@ benchLargeValueVsSmallValueBlob =

initialise inss = do
(tmpDir, hfs, hbio) <- mkFiles
s <- openSession nullTracer hfs hbio (FS.mkFsPath [])
s <- openSession nullTracer hfs hbio bloomSalt (FS.mkFsPath [])
t <- newTableWith benchConfig s
V.mapM_ (inserts t) inss
pure (tmpDir, hfs, hbio, s, t)
Expand Down Expand Up @@ -220,7 +224,7 @@ benchCursorScanVsRangeLookupScan =

initialise inss = do
(tmpDir, hfs, hbio) <- mkFiles
s <- openSession nullTracer hfs hbio (FS.mkFsPath [])
s <- openSession nullTracer hfs hbio bloomSalt (FS.mkFsPath [])
t <- newTableWith benchConfig s
V.mapM_ (inserts t) inss
pure (tmpDir, hfs, hbio, s, t)
Expand Down Expand Up @@ -265,7 +269,7 @@ benchInsertBatches =

initialise = do
(tmpDir, hfs, hbio) <- mkFiles
s <- openSession nullTracer hfs hbio (FS.mkFsPath [])
s <- openSession nullTracer hfs hbio bloomSalt (FS.mkFsPath [])
t <- newTableWith _benchConfig s
pure (tmpDir, hfs, hbio, s, t)

Expand Down Expand Up @@ -451,7 +455,7 @@ mkTable ::
, Table IO K V3 B3
)
mkTable hfs hbio conf = do
sesh <- openSession nullTracer hfs hbio (FS.mkFsPath [])
sesh <- openSession nullTracer hfs hbio bloomSalt (FS.mkFsPath [])
t <- newTableWith conf sesh
pure (sesh, t)

Expand Down
7 changes: 5 additions & 2 deletions bench/micro/Bench/Database/LSMTree/Internal/BloomFilter.hs
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,9 @@ benchmarks = bgroup "Bench.Database.LSMTree.Internal.BloomFilter" [
]
]

bloomSalt :: Bloom.Salt
bloomSalt = 4

-- | Input environment for benchmarking 'Bloom.elem'.
elemEnv ::
Double -- ^ False positive rate
Expand All @@ -61,7 +64,7 @@ elemEnv fpr nbloom nelemsPositive nelemsNegative = do
$ uniformWithoutReplacement @UTxOKey g1 (nbloom + nelemsNegative)
ys2 = sampleUniformWithReplacement @UTxOKey g2 nelemsPositive xs
zs = shuffle (ys1 ++ ys2) g3
pure ( Bloom.fromList (Bloom.policyForFPR fpr) (fmap serialiseKey xs)
pure ( Bloom.fromList (Bloom.policyForFPR fpr) bloomSalt (fmap serialiseKey xs)
, fmap serialiseKey zs
)

Expand All @@ -86,5 +89,5 @@ constructBloom ::
constructBloom fpr m =
-- For faster construction, avoid going via lists and use Bloom.create,
-- traversing the map inserting the keys
Bloom.create (Bloom.sizeForFPR fpr (Map.size m)) $ \b ->
Bloom.create (Bloom.sizeForFPR fpr (Map.size m)) bloomSalt $ \b ->
BiFold.bifoldMap (\k -> Bloom.insert b k) (\_v -> pure ()) m
20 changes: 12 additions & 8 deletions bench/micro/Bench/Database/LSMTree/Internal/Lookup.hs
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,8 @@ import Database.LSMTree.Internal.Lookup (bloomQueries, indexSearches,
intraPageLookupsWithWriteBuffer, lookupsIOWithWriteBuffer,
prepLookups)
import Database.LSMTree.Internal.Page (getNumPages)
import Database.LSMTree.Internal.Paths (RunFsPaths (..))
import Database.LSMTree.Internal.Paths (RunFsPaths (..),
SessionSalt (..))
import Database.LSMTree.Internal.Run (Run)
import qualified Database.LSMTree.Internal.Run as Run
import qualified Database.LSMTree.Internal.RunAcc as RunAcc
Expand Down Expand Up @@ -84,6 +85,9 @@ benchmarks = bgroup "Bench.Database.LSMTree.Internal.Lookup" [
}
]

sessionSalt :: SessionSalt
sessionSalt = SessionSalt 4

benchLookups :: Config -> Benchmark
benchLookups conf@Config{name} =
withEnv $ \ ~(_dir, arenaManager, _hasFS, hasBlockIO, wbblobs, rs, ks) ->
Expand All @@ -96,23 +100,23 @@ benchLookups conf@Config{name} =
-- The bloomfilter is queried for all lookup keys. The result is an
-- unboxed vector, so only use @whnf@.
bench "Bloomfilter query" $
whnf (\ks' -> bloomQueries blooms ks') ks
whnf (\ks' -> bloomQueries sessionSalt blooms ks') ks
-- The compact index is only searched for (true and false) positive
-- lookup keys. We use whnf here because the result is
, env (pure $ bloomQueries blooms ks) $ \rkixs ->
, env (pure $ bloomQueries sessionSalt blooms ks) $ \rkixs ->
bench "Compact index search" $
whnfAppIO (\ks' -> withArena arenaManager $ \arena -> stToIO $ indexSearches arena indexes kopsFiles ks' rkixs) ks
-- prepLookups combines bloom filter querying and index searching.
-- The implementation forces the results to WHNF, so we use
-- whnfAppIO here instead of nfAppIO.
, bench "Lookup preparation in memory" $
whnfAppIO (\ks' -> withArena arenaManager $ \arena -> stToIO $ prepLookups arena blooms indexes kopsFiles ks') ks
whnfAppIO (\ks' -> withArena arenaManager $ \arena -> stToIO $ prepLookups arena sessionSalt blooms indexes kopsFiles ks') ks
-- Submit the IOOps we get from prepLookups to HasBlockIO. We use
-- perRunEnv because IOOps contain mutable buffers, so we want fresh
-- ones for each run of the benchmark. We manually evaluate the
-- result to WHNF since it is unboxed vector.
, bench "Submit IOOps" $
perRunEnv (withArena arenaManager $ \arena -> stToIO $ prepLookups arena blooms indexes kopsFiles ks) $ \ ~(_rkixs, ioops) -> do
perRunEnv (withArena arenaManager $ \arena -> stToIO $ prepLookups arena sessionSalt blooms indexes kopsFiles ks) $ \ ~(_rkixs, ioops) -> do
!_ioress <- FS.submitIO hasBlockIO ioops
pure ()
-- When IO result have been collected, intra-page lookups searches
Expand All @@ -125,7 +129,7 @@ benchLookups conf@Config{name} =
, bench "Perform intra-page lookups" $
perRunEnvWithCleanup
( do arena <- newArena arenaManager
(rkixs, ioops) <- stToIO (prepLookups arena blooms indexes kopsFiles ks)
(rkixs, ioops) <- stToIO (prepLookups arena sessionSalt blooms indexes kopsFiles ks)
ioress <- FS.submitIO hasBlockIO ioops
pure (rkixs, ioops, ioress, arena)
)
Expand All @@ -141,7 +145,7 @@ benchLookups conf@Config{name} =
, bench "Lookups in IO" $
whnfAppIO (\ks' -> lookupsIOWithWriteBuffer
hasBlockIO arenaManager resolveV
WB.empty wbblobs
sessionSalt WB.empty wbblobs
rs blooms indexes kopsFiles ks') ks
]
-- TODO: consider adding benchmarks that also use the write buffer
Expand Down Expand Up @@ -192,7 +196,7 @@ lookupsInBatchesEnv Config {..} = do
wbblobs <- WBB.new hasFS (FS.mkFsPath ["0.wbblobs"])
wb <- WB.fromMap <$> traverse (traverse (WBB.addBlob hasFS wbblobs)) storedKeys
let fsps = RunFsPaths (FS.mkFsPath []) (RunNumber 0)
r <- Run.fromWriteBuffer hasFS hasBlockIO runParams fsps wb wbblobs
r <- Run.fromWriteBuffer hasFS hasBlockIO sessionSalt runParams fsps wb wbblobs
let NumEntries nentriesReal = Run.size r
assertEqual nentriesReal nentries $ pure ()
-- 42 to 43 entries per page
Expand Down
12 changes: 8 additions & 4 deletions bench/micro/Bench/Database/LSMTree/Internal/Merge.hs
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,8 @@ import Database.LSMTree.Internal.Entry
import qualified Database.LSMTree.Internal.Index as Index (IndexType (Compact))
import Database.LSMTree.Internal.Merge (MergeType (..))
import qualified Database.LSMTree.Internal.Merge as Merge
import Database.LSMTree.Internal.Paths (RunFsPaths (..))
import Database.LSMTree.Internal.Paths (RunFsPaths (..),
SessionSalt (..))
import Database.LSMTree.Internal.Run (Run)
import qualified Database.LSMTree.Internal.Run as Run
import qualified Database.LSMTree.Internal.RunAcc as RunAcc
Expand Down Expand Up @@ -220,6 +221,9 @@ benchmarks = bgroup "Bench.Database.LSMTree.Internal.Merge" [
| w <- weights
]

sessionSalt :: SessionSalt
sessionSalt = SessionSalt 4

runParams :: RunBuilder.RunParams
runParams =
RunBuilder.RunParams {
Expand Down Expand Up @@ -273,7 +277,7 @@ merge ::
merge fs hbio Config {..} targetPaths runs = do
let f = fromMaybe const mergeResolve
m <- fromMaybe (error "empty inputs, no merge created") <$>
Merge.new fs hbio runParams mergeType f targetPaths runs
Merge.new fs hbio sessionSalt runParams mergeType f targetPaths runs
Merge.stepsToCompletion m stepSize

fsPath :: FS.FsPath
Expand Down Expand Up @@ -397,7 +401,7 @@ randomRuns ::
randomRuns hasFS hasBlockIO config@Config {..} rng0 = do
counter <- inputRunPathsCounter
fmap V.fromList $
mapM (unsafeCreateRun hasFS hasBlockIO runParams fsPath counter) $
mapM (unsafeCreateRun hasFS hasBlockIO sessionSalt runParams fsPath counter) $
zipWith
(randomRunData config)
nentries
Expand Down Expand Up @@ -446,5 +450,5 @@ randomRunData Config {..} runentries g0 =
-- Each run entry needs a distinct key.
randomWord64OutOf :: Int -> Rnd SerialisedKey
randomWord64OutOf possibleKeys =
first (serialiseKey . Hash.hash64)
first (serialiseKey . Hash.hashSalt64 0)
. uniformR (0, fromIntegral possibleKeys :: Word64)
14 changes: 7 additions & 7 deletions bloomfilter/bench/bloomfilter-bench.hs
Original file line number Diff line number Diff line change
@@ -1,13 +1,11 @@
module Main where

import Criterion.Main (bench, bgroup, defaultMain, env, whnf)
import qualified Data.BloomFilter.Blocked as B.Blocked
import qualified Data.BloomFilter.Classic as B.Classic
import Data.BloomFilter.Hash (Hashable (..), hash64)

import Data.BloomFilter.Hash (Hashable (..))
import Data.Word (Word64)
import System.Random

import Criterion.Main
import System.Random (StdGen, newStdGen, uniform)

main :: IO ()
main =
Expand Down Expand Up @@ -42,11 +40,13 @@ main =

constructBloom_classic :: Int -> Double -> StdGen -> B.Classic.Bloom Word64
constructBloom_classic n fpr g0 =
B.Classic.unfold (B.Classic.sizeForFPR fpr n) (nextElement n) (g0, 0)
let (!salt, !g1) = uniform g0 in
B.Classic.unfold (B.Classic.sizeForFPR fpr n) salt (nextElement n) (g1, 0)

constructBloom_blocked :: Int -> Double -> StdGen -> B.Blocked.Bloom Word64
constructBloom_blocked n fpr g0 =
B.Blocked.unfold (B.Blocked.sizeForFPR fpr n) (nextElement n) (g0, 0)
let (!salt, !g1) = uniform g0 in
B.Blocked.unfold (B.Blocked.sizeForFPR fpr n) salt (nextElement n) (g1, 0)

{-# INLINE nextElement #-}
nextElement :: Int -> (StdGen, Int) -> Maybe (Word64, (StdGen, Int))
Expand Down
Loading