Skip to content

Commit 7e72396

Browse files
committed
add support to return hash in integer format
1 parent ebbaa00 commit 7e72396

File tree

4 files changed

+67
-35
lines changed

4 files changed

+67
-35
lines changed

.chloggen/ottl_murmurhash3_func.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ change_type: enhancement
77
component: pkg/ottl
88

99
# A brief description of the change. Surround your text with quotes ("") if it needs to start with a backtick (`).
10-
note: "Add `MurmurHash3` function to convert the `target` to a hexadecimal string of the murmurHash3 hash/digest"
10+
note: Add `MurmurHash3` function to convert the `target` to murmurHash3 hash
1111

1212
# Mandatory: One or more tracking issues related to the change. You can use the PR number here if no issue exists.
1313
issues: [34077]

pkg/ottl/ottlfuncs/README.md

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -952,18 +952,21 @@ Examples:
952952

953953
`MurmurHash3(target, Optional[version])`
954954

955-
The `MurmurHash3` Converter converts the `target` to a hexadecimal string of murmurHash3 hash/digest
955+
The `MurmurHash3` Converter converts the `target` to murmurHash3 hash/digest.
956956

957-
`target` is a Getter that returns a string.
957+
`target` is a Getter that returns a string. The default `version` is `v128_hash`.
958958

959-
`version` is an optional string. MurmurHash3 has 32-bit and 128-bit versions. The default value is `128`. Valid values are `32` and `128`.
959+
If `version` is
960960

961-
The returned type is `string`.
961+
- `v32_hash`: Uses 32-bit version and returns a signed integer hash.
962+
- `v128_hash`: Use 128-bit version and returns an array of two signed integer hash.
963+
- `v32_hex`: Uses 32-bit version and returns a hash in hexadecimal string format.
964+
- `v128_hex`: Uses 128-bit version and returns a hash in hexadecimal string format.
962965

963966
Examples:
964967

965-
- `MurmurHash3(attributes["device.name"])`
966-
- `MurmurHash3("sometext", version="32")`
968+
- `MurmurHash3(attributes["order.productId"])`
969+
- `MurmurHash3("sometext", version="v32_hex")`
967970

968971
### Nanoseconds
969972

pkg/ottl/ottlfuncs/func_murmurhash3.go

Lines changed: 37 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -15,13 +15,15 @@ import (
1515
)
1616

1717
const (
18-
v32 = "32"
19-
v128 = "128" // default
18+
v32Hash = "v32_hash"
19+
v32Hex = "v32_hex"
20+
v128Hash = "v128_hash" // default
21+
v128Hex = "v128_hex"
2022
)
2123

2224
type MurmurHash3Arguments[K any] struct {
2325
Target ottl.StringGetter[K]
24-
Version ottl.Optional[string] // 32-bit or 128-bit
26+
Version ottl.Optional[string]
2527
}
2628

2729
func NewMurmurHash3Factory[K any]() ottl.Factory[K] {
@@ -35,45 +37,60 @@ func createMurmurHash3Function[K any](_ ottl.FunctionContext, oArgs ottl.Argumen
3537
return nil, fmt.Errorf("MurmurHash3Factory args must be of type *MurmurHash3Arguments[K]")
3638
}
3739

38-
version := v128
40+
version := v128Hash
3941
if !args.Version.IsEmpty() {
4042
v := args.Version.Get()
4143

4244
switch v {
43-
case v32, v128:
45+
case v32Hash, v32Hex, v128Hash, v128Hex:
4446
version = v
4547
default:
46-
return nil, fmt.Errorf("invalid arguments: %s. Version should be either \"32\" or \"128\"", v)
48+
return nil, fmt.Errorf("invalid arguments: %s", v)
4749
}
4850
}
4951

50-
return HexStringLittleEndianVariant(args.Target, version)
52+
return murmurHash3(args.Target, version)
5153
}
5254

53-
// HexStringLittleEndianVariant returns the hexadecimal representation of the hash in little-endian format.
54-
// MurmurHash3, developed by Austin Appleby, is sensitive to endianness. Other languages like Python, Ruby,
55-
// and Java (using Guava) return a hexadecimal string in the little-endian variant. This function does the same.
56-
func HexStringLittleEndianVariant[K any](target ottl.StringGetter[K], version string) (ottl.ExprFunc[K], error) {
55+
func murmurHash3[K any](target ottl.StringGetter[K], version string) (ottl.ExprFunc[K], error) {
5756
return func(ctx context.Context, tCtx K) (any, error) {
5857
val, err := target.Get(ctx, tCtx)
5958
if err != nil {
6059
return nil, err
6160
}
6261

6362
switch version {
64-
case v32:
63+
case v32Hash:
6564
h := murmur3.Sum32([]byte(val))
66-
b := make([]byte, 4)
67-
binary.LittleEndian.PutUint32(b, h)
68-
return hex.EncodeToString(b), nil
69-
case v128:
65+
return int64(h), nil
66+
case v128Hash:
7067
h1, h2 := murmur3.Sum128([]byte(val))
71-
b := make([]byte, 16)
72-
binary.LittleEndian.PutUint64(b[:8], h1)
73-
binary.LittleEndian.PutUint64(b[8:], h2)
74-
return hex.EncodeToString(b), nil
68+
return []int64{int64(h1), int64(h2)}, nil
69+
case v32Hex, v128Hex:
70+
return hexStringLittleEndianVariant(val, version)
7571
default:
7672
return nil, fmt.Errorf("invalid argument: %s", version)
7773
}
7874
}, nil
7975
}
76+
77+
// hexStringLittleEndianVariant returns the hexadecimal representation of the hash in little-endian format.
78+
// MurmurHash3, developed by Austin Appleby, is sensitive to endianness. Other languages like Python, Ruby,
79+
// and Java (using Guava) return a hexadecimal string in the little-endian variant. This function does the same.
80+
func hexStringLittleEndianVariant(target string, version string) (string, error) {
81+
switch version {
82+
case v32Hex:
83+
h := murmur3.Sum32([]byte(target))
84+
b := make([]byte, 4)
85+
binary.LittleEndian.PutUint32(b, h)
86+
return hex.EncodeToString(b), nil
87+
case v128Hex:
88+
h1, h2 := murmur3.Sum128([]byte(target))
89+
b := make([]byte, 16)
90+
binary.LittleEndian.PutUint64(b[:8], h1)
91+
binary.LittleEndian.PutUint64(b[8:], h2)
92+
return hex.EncodeToString(b), nil
93+
default:
94+
return "", fmt.Errorf("invalid argument: %s", version)
95+
}
96+
}

pkg/ottl/ottlfuncs/func_murmurhash3_test.go

Lines changed: 20 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -17,20 +17,32 @@ func Test_MurmurHash3(t *testing.T) {
1717
tests := []struct {
1818
name string
1919
oArgs ottl.Arguments
20-
expected string
20+
expected any
2121
createError string
2222
funcError string
2323
}{
2424
{
25-
name: "string",
25+
name: "string in v32_hash",
2626
oArgs: &MurmurHash3Arguments[any]{
2727
Target: ottl.StandardStringGetter[any]{
2828
Getter: func(_ context.Context, _ any) (any, error) {
2929
return "Hello World", nil
3030
},
3131
},
32+
Version: ottl.NewTestingOptional[string]("v32_hash"),
3233
},
33-
expected: "dbc2a0c1ab26631a27b4c09fcf1fe683",
34+
expected: int64(427197390),
35+
},
36+
{
37+
name: "string in v128_hash",
38+
oArgs: &MurmurHash3Arguments[any]{
39+
Target: ottl.StandardStringGetter[any]{
40+
Getter: func(_ context.Context, _ any) (any, error) {
41+
return "Hello World", nil
42+
},
43+
},
44+
},
45+
expected: []int64{int64(1901405986810282715), int64(-8942425033498643417)},
3446
},
3547
{
3648
name: "empty string",
@@ -40,31 +52,31 @@ func Test_MurmurHash3(t *testing.T) {
4052
return "", nil
4153
},
4254
},
43-
Version: ottl.NewTestingOptional[string]("128"),
55+
Version: ottl.NewTestingOptional[string]("v128_hex"),
4456
},
4557
expected: "00000000000000000000000000000000",
4658
},
4759
{
48-
name: "string in v128",
60+
name: "string in v128_hex",
4961
oArgs: &MurmurHash3Arguments[any]{
5062
Target: ottl.StandardStringGetter[any]{
5163
Getter: func(_ context.Context, _ any) (any, error) {
5264
return "Hello World", nil
5365
},
5466
},
55-
Version: ottl.NewTestingOptional[string]("128"),
67+
Version: ottl.NewTestingOptional[string]("v128_hex"),
5668
},
5769
expected: "dbc2a0c1ab26631a27b4c09fcf1fe683",
5870
},
5971
{
60-
name: "string in v32",
72+
name: "string in v32_hex",
6173
oArgs: &MurmurHash3Arguments[any]{
6274
Target: ottl.StandardStringGetter[any]{
6375
Getter: func(_ context.Context, _ any) (any, error) {
6476
return "Hello World", nil
6577
},
6678
},
67-
Version: ottl.NewTestingOptional[string]("32"),
79+
Version: ottl.NewTestingOptional[string]("v32_hex"),
6880
},
6981
expected: "ce837619",
7082
},

0 commit comments

Comments
 (0)