Skip to content

Commit 36849fb

Browse files
fix: Reject undefined features when using get_historical_features or get_online_features (#2665)
Reject undefined features when using `get_historical_features` or `get_online_features`. Signed-off-by: Abhin Chhabra <[email protected]>
1 parent 4060c3d commit 36849fb

File tree

4 files changed

+112
-3
lines changed

4 files changed

+112
-3
lines changed

sdk/python/feast/feature_store.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -875,7 +875,7 @@ def get_historical_features(
875875
DeprecationWarning,
876876
)
877877

878-
# TODO(achal): _group_feature_refs returns the on demand feature views, but it's no passed into the provider.
878+
# TODO(achal): _group_feature_refs returns the on demand feature views, but it's not passed into the provider.
879879
# This is a weird interface quirk - we should revisit the `get_historical_features` to
880880
# pass in the on demand feature views as well.
881881
fvs, odfvs, request_fvs, request_fv_refs = _group_feature_refs(
@@ -2125,8 +2125,12 @@ def _group_feature_refs(
21252125
for ref in features:
21262126
view_name, feat_name = ref.split(":")
21272127
if view_name in view_index:
2128+
view_index[view_name].projection.get_feature(feat_name) # For validation
21282129
views_features[view_name].add(feat_name)
21292130
elif view_name in on_demand_view_index:
2131+
on_demand_view_index[view_name].projection.get_feature(
2132+
feat_name
2133+
) # For validation
21302134
on_demand_view_features[view_name].add(feat_name)
21312135
# Let's also add in any FV Feature dependencies here.
21322136
for input_fv_projection in on_demand_view_index[
@@ -2135,6 +2139,9 @@ def _group_feature_refs(
21352139
for input_feat in input_fv_projection.features:
21362140
views_features[input_fv_projection.name].add(input_feat.name)
21372141
elif view_name in request_view_index:
2142+
request_view_index[view_name].projection.get_feature(
2143+
feat_name
2144+
) # For validation
21382145
request_views_features[view_name].add(feat_name)
21392146
request_view_refs.add(ref)
21402147
else:

sdk/python/feast/feature_view_projection.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,3 +64,11 @@ def from_definition(base_feature_view: "BaseFeatureView"):
6464
name_alias=None,
6565
features=base_feature_view.features,
6666
)
67+
68+
def get_feature(self, feature_name: str) -> Field:
69+
try:
70+
return next(field for field in self.features if field.name == feature_name)
71+
except StopIteration:
72+
raise KeyError(
73+
f"Feature {feature_name} not found in projection {self.name_to_use()}"
74+
)

sdk/python/tests/integration/offline_store/test_universal_historical_retrieval.py

Lines changed: 41 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@
2121
from feast.infra.offline_stores.offline_utils import (
2222
DEFAULT_ENTITY_DF_EVENT_TIMESTAMP_COL,
2323
)
24-
from feast.types import Int32
24+
from feast.types import Float32, Int32
2525
from feast.value_type import ValueType
2626
from tests.integration.feature_repos.repo_configuration import (
2727
construct_universal_feature_views,
@@ -410,6 +410,46 @@ def test_historical_features(environment, universal_data_sources, full_feature_n
410410
)
411411

412412

413+
@pytest.mark.integration
414+
@pytest.mark.universal
415+
@pytest.mark.parametrize("full_feature_names", [True, False], ids=lambda v: str(v))
416+
def test_historical_features_with_shared_batch_source(
417+
environment, universal_data_sources, full_feature_names
418+
):
419+
# Addresses https://github.com/feast-dev/feast/issues/2576
420+
421+
store = environment.feature_store
422+
423+
entities, datasets, data_sources = universal_data_sources
424+
driver_stats_v1 = FeatureView(
425+
name="driver_stats_v1",
426+
entities=["driver"],
427+
schema=[Field(name="avg_daily_trips", dtype=Int32)],
428+
source=data_sources.driver,
429+
)
430+
driver_stats_v2 = FeatureView(
431+
name="driver_stats_v2",
432+
entities=["driver"],
433+
schema=[
434+
Field(name="avg_daily_trips", dtype=Int32),
435+
Field(name="conv_rate", dtype=Float32),
436+
],
437+
source=data_sources.driver,
438+
)
439+
440+
store.apply([driver(), driver_stats_v1, driver_stats_v2])
441+
442+
with pytest.raises(KeyError):
443+
store.get_historical_features(
444+
entity_df=datasets.entity_df,
445+
features=[
446+
# `driver_stats_v1` does not have `conv_rate`
447+
"driver_stats_v1:conv_rate",
448+
],
449+
full_feature_names=full_feature_names,
450+
).to_df()
451+
452+
413453
@pytest.mark.integration
414454
@pytest.mark.universal_offline_stores
415455
def test_historical_features_with_missing_request_data(

sdk/python/tests/integration/online_store/test_universal_online.py

Lines changed: 55 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919
RequestDataNotFoundInEntityRowsException,
2020
)
2121
from feast.online_response import TIMESTAMP_POSTFIX
22-
from feast.types import String
22+
from feast.types import Float32, Int32, String
2323
from feast.wait import wait_retry_backoff
2424
from tests.integration.feature_repos.repo_configuration import (
2525
Environment,
@@ -324,6 +324,60 @@ def get_online_features_dict(
324324
return dict1
325325

326326

327+
@pytest.mark.integration
328+
@pytest.mark.universal
329+
def test_online_retrieval_with_shared_batch_source(environment, universal_data_sources):
330+
# Addresses https://github.com/feast-dev/feast/issues/2576
331+
332+
fs = environment.feature_store
333+
334+
entities, datasets, data_sources = universal_data_sources
335+
driver_stats_v1 = FeatureView(
336+
name="driver_stats_v1",
337+
entities=["driver"],
338+
schema=[Field(name="avg_daily_trips", dtype=Int32)],
339+
source=data_sources.driver,
340+
)
341+
driver_stats_v2 = FeatureView(
342+
name="driver_stats_v2",
343+
entities=["driver"],
344+
schema=[
345+
Field(name="avg_daily_trips", dtype=Int32),
346+
Field(name="conv_rate", dtype=Float32),
347+
],
348+
source=data_sources.driver,
349+
)
350+
351+
fs.apply([driver(), driver_stats_v1, driver_stats_v2])
352+
353+
data = pd.DataFrame(
354+
{
355+
"driver_id": [1, 2],
356+
"avg_daily_trips": [4, 5],
357+
"conv_rate": [0.5, 0.3],
358+
"event_timestamp": [
359+
pd.to_datetime(1646263500, utc=True, unit="s"),
360+
pd.to_datetime(1646263600, utc=True, unit="s"),
361+
],
362+
"created": [
363+
pd.to_datetime(1646263500, unit="s"),
364+
pd.to_datetime(1646263600, unit="s"),
365+
],
366+
}
367+
)
368+
fs.write_to_online_store("driver_stats_v1", data.drop("conv_rate", axis=1))
369+
fs.write_to_online_store("driver_stats_v2", data)
370+
371+
with pytest.raises(KeyError):
372+
fs.get_online_features(
373+
features=[
374+
# `driver_stats_v1` does not have `conv_rate`
375+
"driver_stats_v1:conv_rate",
376+
],
377+
entity_rows=[{"driver_id": 1}, {"driver_id": 2}],
378+
)
379+
380+
327381
@pytest.mark.integration
328382
@pytest.mark.universal_online_stores
329383
@pytest.mark.parametrize("full_feature_names", [True, False], ids=lambda v: str(v))

0 commit comments

Comments
 (0)