|
| 1 | +import abc |
| 2 | +from typing import TYPE_CHECKING, Dict, Optional, Type, cast |
| 3 | + |
| 4 | +import pyarrow as pa |
| 5 | +from pytz import UTC |
| 6 | + |
| 7 | +from feast.data_source import DataSource |
| 8 | +from feast.embedded_go.type_map import FEAST_TYPE_TO_ARROW_TYPE, PA_TIMESTAMP_TYPE |
| 9 | +from feast.errors import ( |
| 10 | + FeastObjectNotFoundException, |
| 11 | + FeatureViewNotFoundException, |
| 12 | + OnDemandFeatureViewNotFoundException, |
| 13 | +) |
| 14 | +from feast.protos.feast.core.FeatureService_pb2 import ( |
| 15 | + LoggingConfig as LoggingConfigProto, |
| 16 | +) |
| 17 | +from feast.types import from_value_type |
| 18 | + |
| 19 | +if TYPE_CHECKING: |
| 20 | + from feast import FeatureService |
| 21 | + from feast.registry import Registry |
| 22 | + |
| 23 | + |
| 24 | +REQUEST_ID_FIELD = "__request_id" |
| 25 | +LOG_TIMESTAMP_FIELD = "__log_timestamp" |
| 26 | +LOG_DATE_FIELD = "__log_date" |
| 27 | + |
| 28 | + |
| 29 | +class LoggingSource: |
| 30 | + """ |
| 31 | + Logging source describes object that produces logs (eg, feature service produces logs of served features). |
| 32 | + It should be able to provide schema of produced logs table and additional metadata that describes logs data. |
| 33 | + """ |
| 34 | + |
| 35 | + @abc.abstractmethod |
| 36 | + def get_schema(self, registry: "Registry") -> pa.Schema: |
| 37 | + """ Generate schema for logs destination. """ |
| 38 | + raise NotImplementedError |
| 39 | + |
| 40 | + @abc.abstractmethod |
| 41 | + def get_log_timestamp_column(self) -> str: |
| 42 | + """ Return timestamp column that must exist in generated schema. """ |
| 43 | + raise NotImplementedError |
| 44 | + |
| 45 | + |
| 46 | +class FeatureServiceLoggingSource(LoggingSource): |
| 47 | + def __init__(self, feature_service: "FeatureService", project: str): |
| 48 | + self._feature_service = feature_service |
| 49 | + self._project = project |
| 50 | + |
| 51 | + def get_schema(self, registry: "Registry") -> pa.Schema: |
| 52 | + fields: Dict[str, pa.DataType] = {} |
| 53 | + |
| 54 | + for projection in self._feature_service.feature_view_projections: |
| 55 | + for feature in projection.features: |
| 56 | + fields[ |
| 57 | + f"{projection.name_to_use()}__{feature.name}" |
| 58 | + ] = FEAST_TYPE_TO_ARROW_TYPE[feature.dtype] |
| 59 | + fields[ |
| 60 | + f"{projection.name_to_use()}__{feature.name}__timestamp" |
| 61 | + ] = PA_TIMESTAMP_TYPE |
| 62 | + fields[ |
| 63 | + f"{projection.name_to_use()}__{feature.name}__status" |
| 64 | + ] = pa.int32() |
| 65 | + |
| 66 | + try: |
| 67 | + feature_view = registry.get_feature_view(projection.name, self._project) |
| 68 | + except FeatureViewNotFoundException: |
| 69 | + try: |
| 70 | + on_demand_feature_view = registry.get_on_demand_feature_view( |
| 71 | + projection.name, self._project |
| 72 | + ) |
| 73 | + except OnDemandFeatureViewNotFoundException: |
| 74 | + raise FeastObjectNotFoundException( |
| 75 | + f"Can't recognize feature view with a name {projection.name}" |
| 76 | + ) |
| 77 | + |
| 78 | + for ( |
| 79 | + request_source |
| 80 | + ) in on_demand_feature_view.source_request_sources.values(): |
| 81 | + for field in request_source.schema: |
| 82 | + fields[field.name] = FEAST_TYPE_TO_ARROW_TYPE[field.dtype] |
| 83 | + |
| 84 | + else: |
| 85 | + for entity_name in feature_view.entities: |
| 86 | + entity = registry.get_entity(entity_name, self._project) |
| 87 | + join_key = projection.join_key_map.get( |
| 88 | + entity.join_key, entity.join_key |
| 89 | + ) |
| 90 | + fields[join_key] = FEAST_TYPE_TO_ARROW_TYPE[ |
| 91 | + from_value_type(entity.value_type) |
| 92 | + ] |
| 93 | + |
| 94 | + # system columns |
| 95 | + fields[REQUEST_ID_FIELD] = pa.string() |
| 96 | + fields[LOG_TIMESTAMP_FIELD] = pa.timestamp("us", tz=UTC) |
| 97 | + |
| 98 | + return pa.schema( |
| 99 | + [pa.field(name, data_type) for name, data_type in fields.items()] |
| 100 | + ) |
| 101 | + |
| 102 | + def get_log_timestamp_column(self) -> str: |
| 103 | + return LOG_TIMESTAMP_FIELD |
| 104 | + |
| 105 | + |
| 106 | +class _DestinationRegistry(type): |
| 107 | + classes_by_proto_attr_name: Dict[str, Type["LoggingDestination"]] = {} |
| 108 | + |
| 109 | + def __new__(cls, name, bases, dct): |
| 110 | + kls = type.__new__(cls, name, bases, dct) |
| 111 | + if dct.get("_proto_attr_name"): |
| 112 | + cls.classes_by_proto_attr_name[dct["_proto_attr_name"]] = kls |
| 113 | + return kls |
| 114 | + |
| 115 | + |
| 116 | +class LoggingDestination: |
| 117 | + """ |
| 118 | + Logging destination contains details about where exactly logs should be written inside an offline store. |
| 119 | + It is implementation specific - each offline store must implement LoggingDestination subclass. |
| 120 | +
|
| 121 | + Kind of logging destination will be determined by matching attribute name in LoggingConfig protobuf message |
| 122 | + and "_proto_kind" property of each subclass. |
| 123 | + """ |
| 124 | + |
| 125 | + _proto_kind: str |
| 126 | + |
| 127 | + @classmethod |
| 128 | + @abc.abstractmethod |
| 129 | + def from_proto(cls, config_proto: LoggingConfigProto) -> "LoggingDestination": |
| 130 | + raise NotImplementedError |
| 131 | + |
| 132 | + @abc.abstractmethod |
| 133 | + def to_proto(self) -> LoggingConfigProto: |
| 134 | + raise NotImplementedError |
| 135 | + |
| 136 | + @abc.abstractmethod |
| 137 | + def to_data_source(self) -> DataSource: |
| 138 | + """ |
| 139 | + Convert this object into a data source to read logs from an offline store. |
| 140 | + """ |
| 141 | + raise NotImplementedError |
| 142 | + |
| 143 | + |
| 144 | +class LoggingConfig: |
| 145 | + destination: LoggingDestination |
| 146 | + |
| 147 | + def __init__(self, destination: LoggingDestination): |
| 148 | + self.destination = destination |
| 149 | + |
| 150 | + @classmethod |
| 151 | + def from_proto(cls, config_proto: LoggingConfigProto) -> Optional["LoggingConfig"]: |
| 152 | + proto_kind = cast(str, config_proto.WhichOneof("destination")) |
| 153 | + if proto_kind is None: |
| 154 | + return |
| 155 | + |
| 156 | + if proto_kind == "custom_destination": |
| 157 | + proto_kind = config_proto.custom_destination.kind |
| 158 | + |
| 159 | + destination_class = _DestinationRegistry.classes_by_proto_attr_name[proto_kind] |
| 160 | + return LoggingConfig(destination=destination_class.from_proto(config_proto)) |
| 161 | + |
| 162 | + def to_proto(self) -> LoggingConfigProto: |
| 163 | + proto = self.destination.to_proto() |
| 164 | + return proto |
0 commit comments