Skip to content

Commit c7ba370

Browse files
fix: Parse inline data sources (#3036)
* Remove deprecated CLI warnings Signed-off-by: Felix Wang <[email protected]> * Fix repo parsing logic Signed-off-by: Felix Wang <[email protected]> * Add tests Signed-off-by: Felix Wang <[email protected]> Signed-off-by: Felix Wang <[email protected]>
1 parent 66d2c76 commit c7ba370

9 files changed

+437
-194
lines changed

sdk/python/feast/cli.py

Lines changed: 0 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,6 @@
1313
# limitations under the License.
1414
import json
1515
import logging
16-
import warnings
1716
from datetime import datetime
1817
from pathlib import Path
1918
from typing import List, Optional
@@ -45,7 +44,6 @@
4544
from feast.utils import maybe_local_tz
4645

4746
_logger = logging.getLogger(__name__)
48-
warnings.filterwarnings("ignore", category=DeprecationWarning, module="(?!feast)")
4947

5048

5149
class NoOptionDefaultFormat(click.Command):
@@ -197,11 +195,6 @@ def data_source_describe(ctx: click.Context, name: str):
197195
print(e)
198196
exit(1)
199197

200-
warnings.warn(
201-
"Describing data sources will only work properly if all data sources have names or table names specified. "
202-
"Starting Feast 0.24, data source unique names will be required to encourage data source discovery.",
203-
RuntimeWarning,
204-
)
205198
print(
206199
yaml.dump(
207200
yaml.safe_load(str(data_source)), default_flow_style=False, sort_keys=False
@@ -224,11 +217,6 @@ def data_source_list(ctx: click.Context):
224217

225218
from tabulate import tabulate
226219

227-
warnings.warn(
228-
"Listing data sources will only work properly if all data sources have names or table names specified. "
229-
"Starting Feast 0.24, data source unique names will be required to encourage data source discovery",
230-
RuntimeWarning,
231-
)
232220
print(tabulate(table, headers=["NAME", "CLASS"], tablefmt="plain"))
233221

234222

sdk/python/feast/feature_store.py

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,13 @@
4343
from feast import feature_server, flags_helper, ui_server, utils
4444
from feast.base_feature_view import BaseFeatureView
4545
from feast.batch_feature_view import BatchFeatureView
46-
from feast.data_source import DataSource, PushMode
46+
from feast.data_source import (
47+
DataSource,
48+
KafkaSource,
49+
KinesisSource,
50+
PushMode,
51+
PushSource,
52+
)
4753
from feast.diff.infra_diff import InfraDiff, diff_infra_protos
4854
from feast.diff.registry_diff import RegistryDiff, apply_diff_to_registry, diff_between
4955
from feast.dqm.errors import ValidationFailed
@@ -827,6 +833,18 @@ def apply(
827833
ob for ob in objects if isinstance(ob, ValidationReference)
828834
]
829835

836+
batch_sources_to_add: List[DataSource] = []
837+
for data_source in data_sources_set_to_update:
838+
if (
839+
isinstance(data_source, PushSource)
840+
or isinstance(data_source, KafkaSource)
841+
or isinstance(data_source, KinesisSource)
842+
):
843+
assert data_source.batch_source
844+
batch_sources_to_add.append(data_source.batch_source)
845+
for batch_source in batch_sources_to_add:
846+
data_sources_set_to_update.add(batch_source)
847+
830848
for fv in itertools.chain(views_to_update, sfvs_to_update):
831849
data_sources_set_to_update.add(fv.batch_source)
832850
if fv.stream_source:

sdk/python/feast/repo_operations.py

Lines changed: 40 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414

1515
from feast import PushSource
1616
from feast.batch_feature_view import BatchFeatureView
17-
from feast.data_source import DataSource, KafkaSource
17+
from feast.data_source import DataSource, KafkaSource, KinesisSource
1818
from feast.diff.registry_diff import extract_objects_for_keep_delete_update_add
1919
from feast.entity import Entity
2020
from feast.feature_service import FeatureService
@@ -114,44 +114,64 @@ def parse_repo(repo_root: Path) -> RepoContents:
114114
request_feature_views=[],
115115
)
116116

117-
data_sources_set = set()
118117
for repo_file in get_repo_files(repo_root):
119118
module_path = py_path_to_module(repo_file)
120119
module = importlib.import_module(module_path)
120+
121121
for attr_name in dir(module):
122122
obj = getattr(module, attr_name)
123+
123124
if isinstance(obj, DataSource) and not any(
124125
(obj is ds) for ds in res.data_sources
125126
):
126127
res.data_sources.append(obj)
127-
data_sources_set.add(obj)
128+
129+
# Handle batch sources defined within stream sources.
130+
if (
131+
isinstance(obj, PushSource)
132+
or isinstance(obj, KafkaSource)
133+
or isinstance(obj, KinesisSource)
134+
):
135+
batch_source = obj.batch_source
136+
137+
if batch_source and not any(
138+
(batch_source is ds) for ds in res.data_sources
139+
):
140+
res.data_sources.append(batch_source)
128141
if (
129142
isinstance(obj, FeatureView)
130143
and not any((obj is fv) for fv in res.feature_views)
131144
and not isinstance(obj, StreamFeatureView)
132145
and not isinstance(obj, BatchFeatureView)
133146
):
134147
res.feature_views.append(obj)
135-
if isinstance(obj.stream_source, PushSource) and not any(
136-
(obj is ds) for ds in res.data_sources
137-
):
138-
push_source_dep = obj.stream_source.batch_source
139-
# Don't add if the push source's batch source is a duplicate of an existing batch source
140-
if push_source_dep not in data_sources_set:
141-
res.data_sources.append(push_source_dep)
148+
149+
# Handle batch sources defined with feature views.
150+
batch_source = obj.batch_source
151+
assert batch_source
152+
if not any((batch_source is ds) for ds in res.data_sources):
153+
res.data_sources.append(batch_source)
154+
155+
# Handle stream sources defined with feature views.
156+
if obj.stream_source:
157+
stream_source = obj.stream_source
158+
if not any((stream_source is ds) for ds in res.data_sources):
159+
res.data_sources.append(stream_source)
142160
elif isinstance(obj, StreamFeatureView) and not any(
143161
(obj is sfv) for sfv in res.stream_feature_views
144162
):
145163
res.stream_feature_views.append(obj)
146-
if (
147-
isinstance(obj.stream_source, PushSource)
148-
or isinstance(obj.stream_source, KafkaSource)
149-
and not any((obj is ds) for ds in res.data_sources)
150-
):
151-
batch_source_dep = obj.stream_source.batch_source
152-
# Don't add if the push source's batch source is a duplicate of an existing batch source
153-
if batch_source_dep and batch_source_dep not in data_sources_set:
154-
res.data_sources.append(batch_source_dep)
164+
165+
# Handle batch sources defined with feature views.
166+
batch_source = obj.batch_source
167+
if not any((batch_source is ds) for ds in res.data_sources):
168+
res.data_sources.append(batch_source)
169+
170+
# Handle stream sources defined with feature views.
171+
stream_source = obj.stream_source
172+
assert stream_source
173+
if not any((stream_source is ds) for ds in res.data_sources):
174+
res.data_sources.append(stream_source)
155175
elif isinstance(obj, Entity) and not any(
156176
(obj is entity) for entity in res.entities
157177
):
@@ -168,6 +188,7 @@ def parse_repo(repo_root: Path) -> RepoContents:
168188
(obj is rfv) for rfv in res.request_feature_views
169189
):
170190
res.request_feature_views.append(obj)
191+
171192
res.entities.append(DUMMY_ENTITY)
172193
return res
173194

@@ -300,7 +321,6 @@ def log_infra_changes(
300321

301322
@log_exceptions_and_usage
302323
def apply_total(repo_config: RepoConfig, repo_path: Path, skip_source_validation: bool):
303-
304324
os.chdir(repo_path)
305325
project, registry, repo, store = _prepare_registry_and_repo(repo_config, repo_path)
306326
apply_total_with_repo_instance(
Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
from datetime import timedelta
2+
3+
from feast import Entity, FeatureView, Field, FileSource
4+
from feast.types import Float32, Int32, Int64
5+
6+
driver = Entity(
7+
name="driver_id",
8+
description="driver id",
9+
)
10+
11+
driver_hourly_stats_view = FeatureView(
12+
name="driver_hourly_stats",
13+
entities=[driver],
14+
ttl=timedelta(days=1),
15+
schema=[
16+
Field(name="conv_rate", dtype=Float32),
17+
Field(name="acc_rate", dtype=Float32),
18+
Field(name="avg_daily_trips", dtype=Int64),
19+
Field(name="driver_id", dtype=Int32),
20+
],
21+
online=True,
22+
source=FileSource(
23+
path="data/driver_stats.parquet", # Fake path
24+
timestamp_field="event_timestamp",
25+
created_timestamp_column="created",
26+
),
27+
tags={},
28+
)
Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
from datetime import timedelta
2+
3+
from feast import Entity, FeatureView, Field, FileSource, KafkaSource
4+
from feast.data_format import AvroFormat
5+
from feast.types import Float32, Int32, Int64
6+
7+
driver = Entity(
8+
name="driver_id",
9+
description="driver id",
10+
)
11+
12+
driver_hourly_stats_view = FeatureView(
13+
name="driver_hourly_stats",
14+
entities=[driver],
15+
ttl=timedelta(days=1),
16+
schema=[
17+
Field(name="conv_rate", dtype=Float32),
18+
Field(name="acc_rate", dtype=Float32),
19+
Field(name="avg_daily_trips", dtype=Int64),
20+
Field(name="driver_id", dtype=Int32),
21+
],
22+
online=True,
23+
source=KafkaSource(
24+
name="kafka",
25+
timestamp_field="event_timestamp",
26+
kafka_bootstrap_servers="",
27+
message_format=AvroFormat(""),
28+
topic="topic",
29+
batch_source=FileSource(
30+
path="data/driver_stats.parquet", # Fake path
31+
timestamp_field="event_timestamp",
32+
created_timestamp_column="created",
33+
),
34+
watermark_delay_threshold=timedelta(days=1),
35+
),
36+
tags={},
37+
)
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
from datetime import timedelta
2+
3+
from feast import FileSource, KafkaSource
4+
from feast.data_format import AvroFormat
5+
6+
stream_source = KafkaSource(
7+
name="kafka",
8+
timestamp_field="event_timestamp",
9+
kafka_bootstrap_servers="",
10+
message_format=AvroFormat(""),
11+
topic="topic",
12+
batch_source=FileSource(
13+
path="data/driver_stats.parquet", # Fake path
14+
timestamp_field="event_timestamp",
15+
created_timestamp_column="created",
16+
),
17+
watermark_delay_threshold=timedelta(days=1),
18+
)

0 commit comments

Comments
 (0)