Skip to content

Commit 4bba787

Browse files
authored
fix: Fix Feast Java inconsistency with int64 serialization vs python (#3031)
Signed-off-by: Danny Chiao <[email protected]>
1 parent dea65e4 commit 4bba787

File tree

7 files changed

+37
-20
lines changed

7 files changed

+37
-20
lines changed

examples/java-demo/feature_repo/application-override.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ feature-server:
1010
host: my-redis-master
1111
port: 6379
1212
password: [YOUR PASSWORD]
13+
entityKeySerializationVersion: 2
1314
global:
1415
registry:
1516
path: gs://[YOUR BUCKET]/demo-repo/registry.db

examples/java-demo/feature_repo/driver_repo.py

Lines changed: 4 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -3,24 +3,22 @@
33
import pandas as pd
44

55
from feast.data_source import RequestSource
6-
from feast.field import Field
76
from feast.on_demand_feature_view import on_demand_feature_view
8-
from feast.request_feature_view import RequestFeatureView
97
from feast.types import Float32, Float64, Int64, String
108
from feast.field import Field
119

12-
from feast import Entity, Feature, BatchFeatureView, FileSource
10+
from feast import Entity, FileSource, FeatureView
1311

1412
driver_hourly_stats = FileSource(
1513
path="data/driver_stats_with_string.parquet",
1614
timestamp_field="event_timestamp",
1715
created_timestamp_column="created",
1816
)
1917
driver = Entity(name="driver_id", description="driver id",)
20-
driver_hourly_stats_view = BatchFeatureView(
18+
driver_hourly_stats_view = FeatureView(
2119
name="driver_hourly_stats",
2220
entities=[driver],
23-
ttl=timedelta(seconds=86400000),
21+
ttl=timedelta(days=365),
2422
schema=[
2523
Field(name="conv_rate", dtype=Float32),
2624
Field(name="acc_rate", dtype=Float32),
@@ -42,6 +40,7 @@
4240
],
4341
)
4442

43+
4544
# Define an on demand feature view which can generate new features based on
4645
# existing feature views and RequestSource features
4746
@on_demand_feature_view(
@@ -60,14 +59,3 @@ def transformed_conv_rate(inputs: pd.DataFrame) -> pd.DataFrame:
6059
df["conv_rate_plus_val2"] = inputs["conv_rate"] + inputs["val_to_add_2"]
6160
return df
6261

63-
64-
# Define request feature view
65-
driver_age_request_fv = RequestFeatureView(
66-
name="driver_age",
67-
request_data_source=RequestSource(
68-
name="driver_age",
69-
schema=[
70-
Field(name="driver_age", dtype=Int64),
71-
],
72-
),
73-
)

examples/java-demo/feature_repo/feature_store.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,3 +6,4 @@ online_store:
66
connection_string: localhost:6379,password=[YOUR PASSWORD]
77
offline_store:
88
type: file
9+
entity_key_serialization_version: 2
Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
from feast import FeatureStore
2+
3+
4+
def run_demo():
5+
store = FeatureStore(repo_path=".")
6+
7+
print("\n--- Online features ---")
8+
features = store.get_online_features(
9+
features=[
10+
"driver_hourly_stats:conv_rate",
11+
],
12+
entity_rows=[
13+
{
14+
"driver_id": 1001,
15+
},
16+
{
17+
"driver_id": 1002,
18+
}
19+
],
20+
).to_dict()
21+
for key, value in sorted(features.items()):
22+
print(key, " : ", value)
23+
24+
25+
if __name__ == "__main__":
26+
run_demo()

java/serving/README.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ From the Feast GitHub root, run:
4141
java \
4242
-Xms1g \
4343
-Xmx4g \
44-
-jar java/serving/target/feast-serving-0.17.1-SNAPSHOT-jar-with-dependencies.jar \
44+
-jar java/serving/target/feast-serving-[YOUR VERSION]-jar-with-dependencies.jar \
4545
classpath:/application.yml,file:./application-override.yaml
4646
```
4747
5. Now you have a Feast Serving gRPC service running on port 6566 locally!
@@ -124,7 +124,7 @@ You can debug this like any other Java executable. Swap the java command above w
124124
-Xrunjdwp:transport=dt_socket,address=5005,server=y,suspend=y \
125125
-Xms1g \
126126
-Xmx4g \
127-
-jar java/serving/target/feast-serving-0.17.1-SNAPSHOT-jar-with-dependencies.jar \
127+
-jar java/serving/target/feast-serving-[YOUR VERSION]-jar-with-dependencies.jar \
128128
classpath:/application.yml,file:./application-override.yaml
129129
```
130130
Now you can attach e.g. a Remote debugger in IntelliJ to port 5005 to debug / make breakpoints.

java/storage/connectors/redis/src/main/java/feast/storage/connectors/redis/retriever/EntityKeySerializerV2.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -87,14 +87,15 @@ public byte[] serialize(RedisProto.RedisKeyV2 entityKey) {
8787
break;
8888
case INT64_VAL:
8989
buffer.addAll(encodeInteger(ValueProto.ValueType.Enum.INT64.getNumber()));
90-
buffer.addAll(encodeInteger(Integer.BYTES));
9190
/* This is super dumb - but in https://github.com/feast-dev/feast/blob/dcae1606f53028ce5413567fb8b66f92cfef0f8e/sdk/python/feast/infra/key_encoding_utils.py#L9
9291
we use `struct.pack("<l", v.int64_val)` to get the bytes of an int64 val. This actually extracts only 4 bytes,
9392
instead of 8 bytes as you'd expect from to serialize an int64 value.
9493
*/
9594
if (this.entityKeySerializationVersion <= 1) {
95+
buffer.addAll(encodeInteger(Integer.BYTES));
9696
buffer.addAll(encodeInteger(((Long) val.getInt64Val()).intValue()));
9797
} else {
98+
buffer.addAll(encodeInteger(Long.BYTES));
9899
buffer.addAll(encodeLong(((Long) val.getInt64Val())));
99100
}
100101

sdk/python/feast/feature_store.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -840,7 +840,7 @@ def apply(
840840
)
841841

842842
for rfv in request_views_to_update:
843-
data_sources_set_to_update.add(rfv.request_data_source)
843+
data_sources_set_to_update.add(rfv.request_source)
844844

845845
for odfv in odfvs_to_update:
846846
for v in odfv.source_request_sources.values():

0 commit comments

Comments
 (0)