Skip to content

Commit 559e85f

Browse files
committed
feat(bloomfilter): add salt
1 parent 9982e14 commit 559e85f

File tree

5 files changed

+88
-56
lines changed

5 files changed

+88
-56
lines changed

bloomfilter/src/Data/BloomFilter/Blocked.hs

Lines changed: 23 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -92,34 +92,35 @@ import Prelude hiding (elem, notElem)
9292
-- Example:
9393
--
9494
-- @
95-
--filter = create (sizeForBits 16 2) $ \mf -> do
95+
--filter = create salt (sizeForBits 16 2) $ \mf -> do
9696
-- insert mf \"foo\"
9797
-- insert mf \"bar\"
9898
-- @
9999
--
100100
-- Note that the result of the setup function is not used.
101-
create :: BloomSize
101+
create :: Salt
102+
-> BloomSize
102103
-> (forall s. (MBloom s a -> ST s ())) -- ^ setup function
103104
-> Bloom a
104105
{-# INLINE create #-}
105-
create bloomsize body =
106+
create bloomsalt bloomsize body =
106107
runST $ do
107-
mb <- new bloomsize
108+
mb <- new bloomsalt bloomsize
108109
body mb
109110
unsafeFreeze mb
110111

111112
{-# INLINEABLE insert #-}
112113
-- | Insert a value into a mutable Bloom filter. Afterwards, a
113114
-- membership query for the same value is guaranteed to return @True@.
114115
insert :: Hashable a => MBloom s a -> a -> ST s ()
115-
insert = \ !mb !x -> insertHashes mb (hashes x)
116+
insert = \ !mb !x -> insertHashes mb (hashes (mbHashSalt mb) x)
116117

117118
{-# INLINE elem #-}
118119
-- | Query an immutable Bloom filter for membership. If the value is
119120
-- present, return @True@. If the value is not present, there is
120121
-- /still/ some possibility that @True@ will be returned.
121122
elem :: Hashable a => a -> Bloom a -> Bool
122-
elem = \ !x !b -> elemHashes b (hashes x)
123+
elem = \ !x !b -> elemHashes b (hashes (hashSalt b) x)
123124

124125
-- | Same as 'elem' but with the opposite argument order:
125126
--
@@ -149,13 +150,14 @@ notElem = \x b -> not (x `elem` b)
149150
-- @b@ is used as a new seed.
150151
unfold :: forall a b.
151152
Hashable a
152-
=> BloomSize
153+
=> Salt
154+
-> BloomSize
153155
-> (b -> Maybe (a, b)) -- ^ seeding function
154156
-> b -- ^ initial seed
155157
-> Bloom a
156158
{-# INLINE unfold #-}
157-
unfold bloomsize f k =
158-
create bloomsize body
159+
unfold bloomsalt bloomsize f k =
160+
create bloomsalt bloomsize body
159161
where
160162
body :: forall s. MBloom s a -> ST s ()
161163
body mb = loop k
@@ -173,23 +175,26 @@ unfold bloomsize f k =
173175
-- filt = fromList (policyForBits 10) [\"foo\", \"bar\", \"quux\"]
174176
-- @
175177
fromList :: (Foldable t, Hashable a)
176-
=> BloomPolicy
178+
=> Salt
179+
-> BloomPolicy
177180
-> t a -- ^ values to populate with
178181
-> Bloom a
179-
fromList policy xs =
180-
create bsize (\b -> mapM_ (insert b) xs)
182+
fromList bloomsalt policy xs =
183+
create bloomsalt bsize (\b -> mapM_ (insert b) xs)
181184
where
182185
bsize = sizeForPolicy policy (length xs)
183186

184-
{-# SPECIALISE deserialise :: BloomSize
187+
{-# SPECIALISE deserialise :: Salt
188+
-> BloomSize
185189
-> (MutableByteArray RealWorld -> Int -> Int -> IO ())
186190
-> IO (Bloom a) #-}
187191
deserialise :: PrimMonad m
188-
=> BloomSize
192+
=> Salt
193+
-> BloomSize
189194
-> (MutableByteArray (PrimState m) -> Int -> Int -> m ())
190195
-> m (Bloom a)
191-
deserialise bloomsize fill = do
192-
mbloom <- stToPrim $ new bloomsize
196+
deserialise bloomsalt bloomsize fill = do
197+
mbloom <- stToPrim $ new bloomsalt bloomsize
193198
Internal.deserialise mbloom fill
194199
stToPrim $ unsafeFreeze mbloom
195200

@@ -235,7 +240,7 @@ insertMany bloom key n =
235240
prepareProbes !i !i_w
236241
| i_w < 0x0f && i < n = do
237242
k <- key i
238-
let !kh = hashes k
243+
let !kh = hashes (mbHashSalt bloom) k
239244
prefetchInsert bloom kh
240245
P.writePrimArray buf i_w kh
241246
prepareProbes (i+1) (i_w+1)
@@ -258,7 +263,7 @@ insertMany bloom key n =
258263
-- (from the read end of the buffer).
259264
| i < n = do
260265
k <- key i
261-
let !kh = hashes k
266+
let !kh = hashes (mbHashSalt bloom) k
262267
prefetchInsert bloom kh
263268
P.writePrimArray buf i_w kh
264269
insertProbe

bloomfilter/src/Data/BloomFilter/Blocked/Internal.hs

Lines changed: 21 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -9,16 +9,19 @@
99
module Data.BloomFilter.Blocked.Internal (
1010
-- * Mutable Bloom filters
1111
MBloom,
12+
mbHashSalt,
1213
new,
1314
maxSizeBits,
1415

1516
-- * Immutable Bloom filters
1617
Bloom,
18+
hashSalt,
1719
bloomInvariant,
1820
size,
1921

2022
-- * Hash-based operations
2123
Hashes,
24+
Salt (Salt),
2225
hashes,
2326
insertHashes,
2427
prefetchInsert,
@@ -52,6 +55,7 @@ import Data.BloomFilter.Blocked.BitArray (BitArray, BitIx (..),
5255
import qualified Data.BloomFilter.Blocked.BitArray as BitArray
5356
import Data.BloomFilter.Classic.Calc
5457
import Data.BloomFilter.Hash
58+
import Data.Word (Word64)
5559

5660
-- | The version of the format used by 'serialise' and 'deserialise'. The
5761
-- format number will change when there is an incompatible change in the
@@ -84,6 +88,7 @@ type MBloom :: Type -> Type -> Type
8488
data MBloom s a = MBloom {
8589
mbNumBlocks :: {-# UNPACK #-} !NumBlocks -- ^ non-zero
8690
, mbNumHashes :: {-# UNPACK #-} !Int
91+
, mbHashSalt :: {-# UNPACK #-} !Salt
8792
, mbBitArray :: {-# UNPACK #-} !(MBitArray s)
8893
}
8994
type role MBloom nominal nominal
@@ -100,13 +105,14 @@ instance NFData (MBloom s a) where
100105
--
101106
-- The filter size is capped at 'maxSizeBits'.
102107
--
103-
new :: BloomSize -> ST s (MBloom s a)
104-
new BloomSize { sizeBits, sizeHashes } = do
108+
new :: Salt -> BloomSize -> ST s (MBloom s a)
109+
new hashSalt BloomSize { sizeBits, sizeHashes } = do
105110
let numBlocks = bitsToBlocks (max 1 (min maxSizeBits sizeBits))
106111
mbBitArray <- BitArray.new numBlocks
107112
pure MBloom {
108113
mbNumBlocks = numBlocks,
109114
mbNumHashes = max 1 sizeHashes,
115+
mbHashSalt = hashSalt,
110116
mbBitArray
111117
}
112118

@@ -174,6 +180,7 @@ type Bloom :: Type -> Type
174180
data Bloom a = Bloom {
175181
numBlocks :: {-# UNPACK #-} !NumBlocks -- ^ non-zero
176182
, numHashes :: {-# UNPACK #-} !Int
183+
, hashSalt :: {-# UNPACK #-} !Salt
177184
, bitArray :: {-# UNPACK #-} !BitArray
178185
}
179186
deriving stock Eq
@@ -253,11 +260,12 @@ serialise b@Bloom{bitArray} =
253260
-- | Create an immutable Bloom filter from a mutable one. The mutable
254261
-- filter may be modified afterwards.
255262
freeze :: MBloom s a -> ST s (Bloom a)
256-
freeze MBloom { mbNumBlocks, mbNumHashes, mbBitArray } = do
263+
freeze MBloom { mbNumBlocks, mbNumHashes, mbHashSalt, mbBitArray } = do
257264
bitArray <- BitArray.freeze mbBitArray
258265
let !bf = Bloom {
259266
numBlocks = mbNumBlocks,
260267
numHashes = mbNumHashes,
268+
hashSalt = mbHashSalt,
261269
bitArray
262270
}
263271
assert (bloomInvariant bf) $ pure bf
@@ -266,23 +274,25 @@ freeze MBloom { mbNumBlocks, mbNumHashes, mbBitArray } = do
266274
-- mutable filter /must not/ be modified afterwards. For a safer creation
267275
-- interface, use 'freeze' or 'create'.
268276
unsafeFreeze :: MBloom s a -> ST s (Bloom a)
269-
unsafeFreeze MBloom { mbNumBlocks, mbNumHashes, mbBitArray } = do
277+
unsafeFreeze MBloom { mbNumBlocks, mbNumHashes, mbHashSalt, mbBitArray } = do
270278
bitArray <- BitArray.unsafeFreeze mbBitArray
271279
let !bf = Bloom {
272280
numBlocks = mbNumBlocks,
273281
numHashes = mbNumHashes,
282+
hashSalt = mbHashSalt,
274283
bitArray
275284
}
276285
assert (bloomInvariant bf) $ pure bf
277286

278287
-- | Copy an immutable Bloom filter to create a mutable one. There is
279288
-- no non-copying equivalent.
280289
thaw :: Bloom a -> ST s (MBloom s a)
281-
thaw Bloom { numBlocks, numHashes, bitArray } = do
290+
thaw Bloom { numBlocks, numHashes, hashSalt, bitArray } = do
282291
mbBitArray <- BitArray.thaw bitArray
283292
pure MBloom {
284293
mbNumBlocks = numBlocks,
285294
mbNumHashes = numHashes,
295+
mbHashSalt = hashSalt,
286296
mbBitArray
287297
}
288298

@@ -317,9 +327,13 @@ newtype Hashes a = Hashes Hash
317327
deriving newtype Prim
318328
type role Hashes nominal
319329

330+
-- | The salt value to be used for hashes.
331+
newtype Salt = Salt Word64
332+
deriving stock (Eq, Show)
333+
320334
{-# INLINE hashes #-}
321-
hashes :: Hashable a => a -> Hashes a
322-
hashes = Hashes . hash64
335+
hashes :: (Hashable a) => Salt -> a -> Hashes a
336+
hashes = \ (Salt !salt) !x -> Hashes (hashSalt64 salt x)
323337

324338
{-# INLINE blockIxAndBitGen #-}
325339
-- | The scheme for turning 'Hashes' into block and bit indexes is as follows:

bloomfilter/src/Data/BloomFilter/Classic.hs

Lines changed: 21 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -100,26 +100,27 @@ import Prelude hiding (elem, notElem, read)
100100
-- @
101101
--
102102
-- Note that the result of the setup function is not used.
103-
create :: BloomSize
103+
create :: Salt
104+
-> BloomSize
104105
-> (forall s. (MBloom s a -> ST s ())) -- ^ setup function
105106
-> Bloom a
106107
{-# INLINE create #-}
107-
create bloomsize body =
108+
create bloomsalt bloomsize body =
108109
runST $ do
109-
mb <- new bloomsize
110+
mb <- new bloomsalt bloomsize
110111
body mb
111112
unsafeFreeze mb
112113

113114
-- | Insert a value into a mutable Bloom filter. Afterwards, a
114115
-- membership query for the same value is guaranteed to return @True@.
115116
insert :: Hashable a => MBloom s a -> a -> ST s ()
116-
insert !mb !x = insertHashes mb (hashes x)
117+
insert !mb !x = insertHashes mb (hashes (mbHashSalt mb) x)
117118

118119
-- | Query an immutable Bloom filter for membership. If the value is
119120
-- present, return @True@. If the value is not present, there is
120121
-- /still/ some possibility that @True@ will be returned.
121122
elem :: Hashable a => a -> Bloom a -> Bool
122-
elem = \ !x !b -> elemHashes b (hashes x)
123+
elem = \ !x !b -> elemHashes b (hashes (hashSalt b) x)
123124

124125
-- | Same as 'elem' but with the opposite argument order:
125126
--
@@ -142,7 +143,7 @@ notElem = \ x b -> not (x `elem` b)
142143
-- present, return @True@. If the value is not present, there is
143144
-- /still/ some possibility that @True@ will be returned.
144145
read :: Hashable a => MBloom s a -> a -> ST s Bool
145-
read !mb !x = readHashes mb (hashes x)
146+
read !mb !x = readHashes mb (hashes (mbHashSalt mb) x)
146147

147148
-- | Build an immutable Bloom filter from a seed value. The seeding
148149
-- function populates the filter as follows.
@@ -154,13 +155,14 @@ read !mb !x = readHashes mb (hashes x)
154155
-- @b@ is used as a new seed.
155156
unfold :: forall a b.
156157
Hashable a
157-
=> BloomSize
158+
=> Salt
159+
-> BloomSize
158160
-> (b -> Maybe (a, b)) -- ^ seeding function
159161
-> b -- ^ initial seed
160162
-> Bloom a
161163
{-# INLINE unfold #-}
162-
unfold bloomsize f k =
163-
create bloomsize body
164+
unfold bloomsalt bloomsize f k =
165+
create bloomsalt bloomsize body
164166
where
165167
body :: forall s. MBloom s a -> ST s ()
166168
body mb = loop k
@@ -180,23 +182,26 @@ unfold bloomsize f k =
180182
-- filt = fromList (policyForBits 10) [\"foo\", \"bar\", \"quux\"]
181183
-- @
182184
fromList :: (Foldable t, Hashable a)
183-
=> BloomPolicy
185+
=> Salt
186+
-> BloomPolicy
184187
-> t a -- ^ values to populate with
185188
-> Bloom a
186-
fromList policy xs =
187-
create bsize (\b -> mapM_ (insert b) xs)
189+
fromList bsalt policy xs =
190+
create bsalt bsize (\b -> mapM_ (insert b) xs)
188191
where
189192
bsize = sizeForPolicy policy (length xs)
190193

191-
{-# SPECIALISE deserialise :: BloomSize
194+
{-# SPECIALISE deserialise :: Salt
195+
-> BloomSize
192196
-> (MutableByteArray RealWorld -> Int -> Int -> IO ())
193197
-> IO (Bloom a) #-}
194198
deserialise :: PrimMonad m
195-
=> BloomSize
199+
=> Salt
200+
-> BloomSize
196201
-> (MutableByteArray (PrimState m) -> Int -> Int -> m ())
197202
-> m (Bloom a)
198-
deserialise bloomsize fill = do
199-
mbloom <- stToPrim $ new bloomsize
203+
deserialise bloomsalt bloomsize fill = do
204+
mbloom <- stToPrim $ new bloomsalt bloomsize
200205
Internal.deserialise mbloom fill
201206
stToPrim $ unsafeFreeze mbloom
202207

0 commit comments

Comments
 (0)