15
15
import enum
16
16
import warnings
17
17
from abc import ABC , abstractmethod
18
+ from datetime import timedelta
18
19
from typing import Any , Callable , Dict , Iterable , List , Optional , Tuple , Union
19
20
21
+ from google .protobuf .duration_pb2 import Duration
20
22
from google .protobuf .json_format import MessageToJson
21
23
22
24
from feast import type_map
@@ -47,11 +49,16 @@ class KafkaOptions:
47
49
"""
48
50
49
51
def __init__ (
50
- self , bootstrap_servers : str , message_format : StreamFormat , topic : str ,
52
+ self ,
53
+ bootstrap_servers : str ,
54
+ message_format : StreamFormat ,
55
+ topic : str ,
56
+ watermark : Optional [timedelta ] = None ,
51
57
):
52
58
self .bootstrap_servers = bootstrap_servers
53
59
self .message_format = message_format
54
60
self .topic = topic
61
+ self .watermark = watermark or None
55
62
56
63
@classmethod
57
64
def from_proto (cls , kafka_options_proto : DataSourceProto .KafkaOptions ):
@@ -64,11 +71,18 @@ def from_proto(cls, kafka_options_proto: DataSourceProto.KafkaOptions):
64
71
Returns:
65
72
Returns a BigQueryOptions object based on the kafka_options protobuf
66
73
"""
67
-
74
+ watermark = None
75
+ if kafka_options_proto .HasField ("watermark" ):
76
+ watermark = (
77
+ timedelta (days = 0 )
78
+ if kafka_options_proto .watermark .ToNanoseconds () == 0
79
+ else kafka_options_proto .watermark .ToTimedelta ()
80
+ )
68
81
kafka_options = cls (
69
82
bootstrap_servers = kafka_options_proto .bootstrap_servers ,
70
83
message_format = StreamFormat .from_proto (kafka_options_proto .message_format ),
71
84
topic = kafka_options_proto .topic ,
85
+ watermark = watermark ,
72
86
)
73
87
74
88
return kafka_options
@@ -80,11 +94,16 @@ def to_proto(self) -> DataSourceProto.KafkaOptions:
80
94
Returns:
81
95
KafkaOptionsProto protobuf
82
96
"""
97
+ watermark_duration = None
98
+ if self .watermark is not None :
99
+ watermark_duration = Duration ()
100
+ watermark_duration .FromTimedelta (self .watermark )
83
101
84
102
kafka_options_proto = DataSourceProto .KafkaOptions (
85
103
bootstrap_servers = self .bootstrap_servers ,
86
104
message_format = self .message_format .to_proto (),
87
105
topic = self .topic ,
106
+ watermark = watermark_duration ,
88
107
)
89
108
90
109
return kafka_options_proto
@@ -369,7 +388,32 @@ def __init__(
369
388
owner : Optional [str ] = "" ,
370
389
timestamp_field : Optional [str ] = "" ,
371
390
batch_source : Optional [DataSource ] = None ,
391
+ watermark : Optional [timedelta ] = None ,
372
392
):
393
+ """
394
+ Creates a KafkaSource stream source object.
395
+ Args:
396
+ name: str. Name of data source, which should be unique within a project
397
+ event_timestamp_column (optional): str. (Deprecated) Event timestamp column used for point in time
398
+ joins of feature values.
399
+ bootstrap_servers: str. The servers of the kafka broker in the form "localhost:9092".
400
+ message_format: StreamFormat. StreamFormat of serialized messages.
401
+ topic: str. The name of the topic to read from in the kafka source.
402
+ created_timestamp_column (optional): str. Timestamp column indicating when the row
403
+ was created, used for deduplicating rows.
404
+ field_mapping (optional): dict(str, str). A dictionary mapping of column names in this data
405
+ source to feature names in a feature table or view. Only used for feature
406
+ columns, not entity or timestamp columns.
407
+ date_partition_column (optional): str. Timestamp column used for partitioning.
408
+ description (optional): str. A human-readable description.
409
+ tags (optional): dict(str, str). A dictionary of key-value pairs to store arbitrary metadata.
410
+ owner (optional): str. The owner of the data source, typically the email of the primary
411
+ maintainer.
412
+ timestamp_field (optional): str. Event timestamp field used for point
413
+ in time joins of feature values.
414
+ batch_source: DataSource. The datasource that acts as a batch source.
415
+ watermark: timedelta. The watermark for stream data. Specifically how late stream data can arrive without being discarded.
416
+ """
373
417
positional_attributes = [
374
418
"name" ,
375
419
"event_timestamp_column" ,
@@ -425,10 +469,12 @@ def __init__(
425
469
timestamp_field = timestamp_field ,
426
470
)
427
471
self .batch_source = batch_source
472
+
428
473
self .kafka_options = KafkaOptions (
429
474
bootstrap_servers = _bootstrap_servers ,
430
475
message_format = _message_format ,
431
476
topic = _topic ,
477
+ watermark = watermark ,
432
478
)
433
479
434
480
def __eq__ (self , other ):
@@ -445,6 +491,7 @@ def __eq__(self, other):
445
491
!= other .kafka_options .bootstrap_servers
446
492
or self .kafka_options .message_format != other .kafka_options .message_format
447
493
or self .kafka_options .topic != other .kafka_options .topic
494
+ or self .kafka_options .watermark != other .kafka_options .watermark
448
495
):
449
496
return False
450
497
@@ -455,6 +502,13 @@ def __hash__(self):
455
502
456
503
@staticmethod
457
504
def from_proto (data_source : DataSourceProto ):
505
+ watermark = None
506
+ if data_source .kafka_options .HasField ("watermark" ):
507
+ watermark = (
508
+ timedelta (days = 0 )
509
+ if data_source .kafka_options .watermark .ToNanoseconds () == 0
510
+ else data_source .kafka_options .watermark .ToTimedelta ()
511
+ )
458
512
return KafkaSource (
459
513
name = data_source .name ,
460
514
event_timestamp_column = data_source .timestamp_field ,
@@ -463,6 +517,7 @@ def from_proto(data_source: DataSourceProto):
463
517
message_format = StreamFormat .from_proto (
464
518
data_source .kafka_options .message_format
465
519
),
520
+ watermark = watermark ,
466
521
topic = data_source .kafka_options .topic ,
467
522
created_timestamp_column = data_source .created_timestamp_column ,
468
523
timestamp_field = data_source .timestamp_field ,
0 commit comments