Skip to content

Commit 0cf3c92

Browse files
authored
feat: Stream Feature View FCOS (#2750)
* Fix working version Signed-off-by: Kevin Zhang <[email protected]> * Working commit Signed-off-by: Kevin Zhang <[email protected]> * Fixes Signed-off-by: Kevin Zhang <[email protected]> * Fix stuffs Signed-off-by: Kevin Zhang <[email protected]> * Fix lint Signed-off-by: Kevin Zhang <[email protected]> * Fix things Signed-off-by: Kevin Zhang <[email protected]> * Fix Signed-off-by: Kevin Zhang <[email protected]> * Fix Signed-off-by: Kevin Zhang <[email protected]> * Fix lint Signed-off-by: Kevin Zhang <[email protected]> * Fix lint Signed-off-by: Kevin Zhang <[email protected]> * Fix lihnt Signed-off-by: Kevin Zhang <[email protected]> * Fix stuff Signed-off-by: Kevin Zhang <[email protected]> * Fix Signed-off-by: Kevin Zhang <[email protected]> * Fix lint Signed-off-by: Kevin Zhang <[email protected]> * Fix unit tests Signed-off-by: Kevin Zhang <[email protected]> * Address review comments Signed-off-by: Kevin Zhang <[email protected]> * Fix Signed-off-by: Kevin Zhang <[email protected]> * Fix Signed-off-by: Kevin Zhang <[email protected]> * Fix lint Signed-off-by: Kevin Zhang <[email protected]> * Fix Signed-off-by: Kevin Zhang <[email protected]> * Fixed Signed-off-by: Kevin Zhang <[email protected]> * Unsaved changes Signed-off-by: Kevin Zhang <[email protected]>
1 parent 5a7fcef commit 0cf3c92

18 files changed

+761
-36
lines changed

protos/feast/core/Aggregation.proto

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
syntax = "proto3";
2+
package feast.core;
3+
4+
option go_package = "github.com/feast-dev/feast/go/protos/feast/core";
5+
option java_outer_classname = "AggregationProto";
6+
option java_package = "feast.proto.core";
7+
8+
import "google/protobuf/duration.proto";
9+
10+
message Aggregation {
11+
string column = 1;
12+
string function = 2;
13+
google.protobuf.Duration time_window = 3;
14+
}

protos/feast/core/DataFormat.proto

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ option java_package = "feast.proto.core";
2626
message FileFormat {
2727
// Defines options for the Parquet data format
2828
message ParquetFormat {}
29-
29+
3030
oneof format {
3131
ParquetFormat parquet_format = 1;
3232
}
@@ -40,17 +40,22 @@ message StreamFormat {
4040
// Feature data from the obtained stream message
4141
string class_path = 1;
4242
}
43-
43+
4444
// Defines options for the avro data format
4545
message AvroFormat {
4646
// Optional if used in a File DataSource as schema is embedded in avro file.
4747
// Specifies the schema of the Avro message as JSON string.
4848
string schema_json = 1;
4949
}
5050

51+
message JsonFormat {
52+
string schema_json = 1;
53+
}
54+
5155
// Specifies the data format and format specific options
5256
oneof format {
5357
AvroFormat avro_format = 1;
5458
ProtoFormat proto_format = 2;
59+
JsonFormat json_format = 3;
5560
}
5661
}

protos/feast/core/FeatureService.proto

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@ option go_package = "github.com/feast-dev/feast/go/protos/feast/core";
55
option java_outer_classname = "FeatureServiceProto";
66
option java_package = "feast.proto.core";
77

8-
import "google/protobuf/duration.proto";
98
import "google/protobuf/timestamp.proto";
109
import "feast/core/FeatureViewProjection.proto";
1110

protos/feast/core/Registry.proto

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,19 +28,21 @@ import "feast/core/FeatureView.proto";
2828
import "feast/core/InfraObject.proto";
2929
import "feast/core/OnDemandFeatureView.proto";
3030
import "feast/core/RequestFeatureView.proto";
31+
import "feast/core/StreamFeatureView.proto";
3132
import "feast/core/DataSource.proto";
3233
import "feast/core/SavedDataset.proto";
3334
import "feast/core/ValidationProfile.proto";
3435
import "google/protobuf/timestamp.proto";
3536

36-
// Next id: 14
37+
// Next id: 15
3738
message Registry {
3839
repeated Entity entities = 1;
3940
repeated FeatureTable feature_tables = 2;
4041
repeated FeatureView feature_views = 6;
4142
repeated DataSource data_sources = 12;
4243
repeated OnDemandFeatureView on_demand_feature_views = 8;
4344
repeated RequestFeatureView request_feature_views = 9;
45+
repeated StreamFeatureView stream_feature_views = 14;
4446
repeated FeatureService feature_services = 7;
4547
repeated SavedDataset saved_datasets = 11;
4648
repeated ValidationReference validation_references = 13;
Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,98 @@
1+
//
2+
// Copyright 2020 The Feast Authors
3+
//
4+
// Licensed under the Apache License, Version 2.0 (the "License");
5+
// you may not use this file except in compliance with the License.
6+
// You may obtain a copy of the License at
7+
//
8+
// https://www.apache.org/licenses/LICENSE-2.0
9+
//
10+
// Unless required by applicable law or agreed to in writing, software
11+
// distributed under the License is distributed on an "AS IS" BASIS,
12+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
// See the License for the specific language governing permissions and
14+
// limitations under the License.
15+
//
16+
17+
18+
syntax = "proto3";
19+
package feast.core;
20+
21+
option go_package = "github.com/feast-dev/feast/go/protos/feast/core";
22+
option java_outer_classname = "StreamFeatureViewProto";
23+
option java_package = "feast.proto.core";
24+
25+
26+
import "google/protobuf/duration.proto";
27+
import "google/protobuf/timestamp.proto";
28+
import "feast/core/OnDemandFeatureView.proto";
29+
import "feast/core/Feature.proto";
30+
import "feast/core/DataSource.proto";
31+
import "feast/core/Aggregation.proto";
32+
33+
message StreamFeatureView {
34+
// User-specified specifications of this feature view.
35+
StreamFeatureViewSpec spec = 1;
36+
StreamFeatureViewMeta meta = 2;
37+
}
38+
39+
// Next available id: 17
40+
message StreamFeatureViewSpec {
41+
// Name of the feature view. Must be unique. Not updated.
42+
string name = 1;
43+
44+
// Name of Feast project that this feature view belongs to.
45+
string project = 2;
46+
47+
// List of names of entities associated with this feature view.
48+
repeated string entities = 3;
49+
50+
// List of specifications for each feature defined as part of this feature view.
51+
repeated FeatureSpecV2 features = 4;
52+
53+
// List of specifications for each entity defined as part of this feature view.
54+
repeated FeatureSpecV2 entity_columns = 5;
55+
56+
// Description of the feature view.
57+
string description = 6;
58+
59+
// User defined metadata
60+
map<string,string> tags = 7;
61+
62+
// Owner of the feature view.
63+
string owner = 8;
64+
65+
// Features in this feature view can only be retrieved from online serving
66+
// younger than ttl. Ttl is measured as the duration of time between
67+
// the feature's event timestamp and when the feature is retrieved
68+
// Feature values outside ttl will be returned as unset values and indicated to end user
69+
google.protobuf.Duration ttl = 9;
70+
71+
// Batch/Offline DataSource where this view can retrieve offline feature data.
72+
DataSource batch_source = 10;
73+
// Streaming DataSource from where this view can consume "online" feature data.
74+
DataSource stream_source = 11;
75+
76+
// Whether these features should be served online or not
77+
bool online = 12;
78+
79+
// Serialized function that is encoded in the streamfeatureview
80+
UserDefinedFunction user_defined_function = 13;
81+
82+
// Mode of execution
83+
string mode = 14;
84+
85+
// Aggregation definitions
86+
repeated Aggregation aggregations = 15;
87+
88+
// Timestamp field for aggregation
89+
string timestamp_field = 16;
90+
}
91+
92+
message StreamFeatureViewMeta {
93+
// Time where this Feature View is created
94+
google.protobuf.Timestamp created_timestamp = 1;
95+
96+
// Time where this Feature View is last updated
97+
google.protobuf.Timestamp last_updated_timestamp = 2;
98+
}

protos/feast/core/ValidationProfile.proto

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,8 +22,6 @@ option java_package = "feast.proto.core";
2222
option java_outer_classname = "ValidationProfile";
2323
option go_package = "github.com/feast-dev/feast/go/protos/feast/core";
2424

25-
import "feast/core/SavedDataset.proto";
26-
2725
message GEValidationProfiler {
2826
message UserDefinedProfiler {
2927
// The python-syntax function body (serialized by dill)

sdk/python/feast/aggregation.py

Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
from datetime import timedelta
2+
from typing import Optional
3+
4+
from google.protobuf.duration_pb2 import Duration
5+
6+
from feast.protos.feast.core.Aggregation_pb2 import Aggregation as AggregationProto
7+
8+
9+
class Aggregation:
10+
"""
11+
NOTE: Feast-handled aggregations are not yet supported. This class provides a way to register user-defined aggregations.
12+
13+
Attributes:
14+
column: str # Column name of the feature we are aggregating.
15+
function: str # Provided built in aggregations sum, max, min, count mean
16+
time_window: timedelta # The time window for this aggregation.
17+
"""
18+
19+
column: str
20+
function: str
21+
time_window: Optional[timedelta]
22+
23+
def __init__(
24+
self,
25+
column: Optional[str] = "",
26+
function: Optional[str] = "",
27+
time_window: Optional[timedelta] = None,
28+
):
29+
self.column = column or ""
30+
self.function = function or ""
31+
self.time_window = time_window
32+
33+
def to_proto(self) -> AggregationProto:
34+
window_duration = None
35+
if self.time_window is not None:
36+
window_duration = Duration()
37+
window_duration.FromTimedelta(self.time_window)
38+
39+
return AggregationProto(
40+
column=self.column, function=self.function, time_window=window_duration
41+
)
42+
43+
@classmethod
44+
def from_proto(cls, agg_proto: AggregationProto):
45+
time_window = (
46+
timedelta(days=0)
47+
if agg_proto.time_window.ToNanoseconds() == 0
48+
else agg_proto.time_window.ToTimedelta()
49+
)
50+
51+
aggregation = cls(
52+
column=agg_proto.column,
53+
function=agg_proto.function,
54+
time_window=time_window,
55+
)
56+
return aggregation
57+
58+
def __eq__(self, other):
59+
if not isinstance(other, Aggregation):
60+
raise TypeError("Comparisons should only involve Aggregations.")
61+
62+
if (
63+
self.column != other.column
64+
or self.function != other.function
65+
or self.time_window != other.time_window
66+
):
67+
return False
68+
69+
return True

sdk/python/feast/data_format.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,8 @@ def from_proto(cls, proto):
8989
fmt = proto.WhichOneof("format")
9090
if fmt == "avro_format":
9191
return AvroFormat(schema_json=proto.avro_format.schema_json)
92+
if fmt == "json_format":
93+
return JsonFormat(schema_json=proto.json_format.schema_json)
9294
if fmt == "proto_format":
9395
return ProtoFormat(class_path=proto.proto_format.class_path)
9496
raise NotImplementedError(f"StreamFormat is unsupported: {fmt}")
@@ -113,6 +115,28 @@ def to_proto(self):
113115
return StreamFormatProto(avro_format=proto)
114116

115117

118+
class JsonFormat(StreamFormat):
119+
"""
120+
Defines the Json streaming data format that encodes data in Json format
121+
"""
122+
123+
def __init__(self, schema_json: str):
124+
"""
125+
Construct a new Json data format.
126+
127+
For spark, uses pyspark ddl string format. Example shown here:
128+
https://vincent.doba.fr/posts/20211004_spark_data_description_language_for_defining_spark_schema/
129+
130+
Args:
131+
schema_json: Json schema definition
132+
"""
133+
self.schema_json = schema_json
134+
135+
def to_proto(self):
136+
proto = StreamFormatProto.JsonFormat(schema_json=self.schema_json)
137+
return StreamFormatProto(json_format=proto)
138+
139+
116140
class ProtoFormat(StreamFormat):
117141
"""
118142
Defines the Protobuf data format

sdk/python/feast/data_source.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -410,6 +410,9 @@ def __init__(
410410
if _message_format is None:
411411
raise ValueError("Message format must be specified for Kafka source")
412412

413+
if not timestamp_field and not _event_timestamp_column:
414+
raise ValueError("Timestamp field must be specified for Kafka source")
415+
413416
super().__init__(
414417
event_timestamp_column=_event_timestamp_column,
415418
created_timestamp_column=created_timestamp_column,

0 commit comments

Comments
 (0)