Skip to content

Commit dd46d45

Browse files
committed
fix: Fix incorrect on demand feature view diffing and improve Java tests (#3074)
* fix: Fix ODFV bug Signed-off-by: Danny Chiao <[email protected]>
1 parent 8e6a6b1 commit dd46d45

File tree

13 files changed

+362
-32
lines changed

13 files changed

+362
-32
lines changed

.github/workflows/java_master_only.yml

Lines changed: 85 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,52 @@ jobs:
6666
java-version: '11'
6767
java-package: jdk
6868
architecture: x64
69+
- name: Setup Python (to call feast apply)
70+
uses: actions/setup-python@v2
71+
id: setup-python
72+
with:
73+
python-version: 3.8
74+
architecture: x64
75+
- name: Setup Go
76+
id: setup-go
77+
uses: actions/setup-go@v2
78+
with:
79+
go-version: 1.18.0
80+
- name: Upgrade pip version
81+
run: |
82+
pip install --upgrade "pip>=21.3.1,<22.1"
83+
- name: Get pip cache dir
84+
id: pip-cache
85+
run: |
86+
echo "::set-output name=dir::$(pip cache dir)"
87+
- name: pip cache
88+
uses: actions/cache@v2
89+
with:
90+
path: |
91+
${{ steps.pip-cache.outputs.dir }}
92+
/opt/hostedtoolcache/Python
93+
/Users/runner/hostedtoolcache/Python
94+
key: ${{ runner.os }}-${{ steps.setup-python.outputs.python-version }}-pip-${{ hashFiles(format('**/py{0}-ci-requirements.txt', env.PYTHON)) }}
95+
restore-keys: |
96+
${{ runner.os }}-${{ steps.setup-python.outputs.python-version }}-pip-
97+
- name: Install pip-tools
98+
run: pip install pip-tools
99+
- name: Install apache-arrow on ubuntu
100+
run: |
101+
sudo apt update
102+
sudo apt install -y -V ca-certificates lsb-release wget
103+
wget https://apache.jfrog.io/artifactory/arrow/$(lsb_release --id --short | tr 'A-Z' 'a-z')/apache-arrow-apt-source-latest-$(lsb_release --codename --short).deb
104+
sudo apt install -y -V ./apache-arrow-apt-source-latest-$(lsb_release --codename --short).deb
105+
sudo apt update
106+
sudo apt install -y -V libarrow-dev
107+
- name: Install Python dependencies
108+
run: make install-python-ci-dependencies
109+
- uses: actions/cache@v2
110+
with:
111+
path: ~/.m2/repository
112+
key: ${{ runner.os }}-it-maven-${{ hashFiles('**/pom.xml') }}
113+
restore-keys: |
114+
${{ runner.os }}-it-maven-
69115
- uses: actions/cache@v2
70116
with:
71117
path: ~/.m2/repository
@@ -91,10 +137,46 @@ jobs:
91137
java-version: '11'
92138
java-package: jdk
93139
architecture: x64
94-
- uses: actions/setup-python@v2
140+
- name: Setup Python (to call feast apply)
141+
uses: actions/setup-python@v2
142+
id: setup-python
143+
with:
144+
python-version: 3.8
145+
architecture: x64
146+
- name: Setup Go
147+
id: setup-go
148+
uses: actions/setup-go@v2
95149
with:
96-
python-version: '3.8'
97-
architecture: 'x64'
150+
go-version: 1.18.0
151+
- name: Upgrade pip version
152+
run: |
153+
pip install --upgrade "pip>=21.3.1,<22.1"
154+
- name: Get pip cache dir
155+
id: pip-cache
156+
run: |
157+
echo "::set-output name=dir::$(pip cache dir)"
158+
- name: pip cache
159+
uses: actions/cache@v2
160+
with:
161+
path: |
162+
${{ steps.pip-cache.outputs.dir }}
163+
/opt/hostedtoolcache/Python
164+
/Users/runner/hostedtoolcache/Python
165+
key: ${{ runner.os }}-${{ steps.setup-python.outputs.python-version }}-pip-${{ hashFiles(format('**/py{0}-ci-requirements.txt', env.PYTHON)) }}
166+
restore-keys: |
167+
${{ runner.os }}-${{ steps.setup-python.outputs.python-version }}-pip-
168+
- name: Install pip-tools
169+
run: pip install pip-tools
170+
- name: Install apache-arrow on ubuntu
171+
run: |
172+
sudo apt update
173+
sudo apt install -y -V ca-certificates lsb-release wget
174+
wget https://apache.jfrog.io/artifactory/arrow/$(lsb_release --id --short | tr 'A-Z' 'a-z')/apache-arrow-apt-source-latest-$(lsb_release --codename --short).deb
175+
sudo apt install -y -V ./apache-arrow-apt-source-latest-$(lsb_release --codename --short).deb
176+
sudo apt update
177+
sudo apt install -y -V libarrow-dev
178+
- name: Install Python dependencies
179+
run: make install-python-ci-dependencies
98180
- uses: actions/cache@v2
99181
with:
100182
path: ~/.m2/repository

.github/workflows/java_pr.yml

Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,52 @@ jobs:
3838
java-version: '11'
3939
java-package: jdk
4040
architecture: x64
41+
- name: Setup Python (to call feast apply)
42+
uses: actions/setup-python@v2
43+
id: setup-python
44+
with:
45+
python-version: 3.8
46+
architecture: x64
47+
- name: Setup Go
48+
id: setup-go
49+
uses: actions/setup-go@v2
50+
with:
51+
go-version: 1.18.0
52+
- name: Upgrade pip version
53+
run: |
54+
pip install --upgrade "pip>=21.3.1,<22.1"
55+
- name: Get pip cache dir
56+
id: pip-cache
57+
run: |
58+
echo "::set-output name=dir::$(pip cache dir)"
59+
- name: pip cache
60+
uses: actions/cache@v2
61+
with:
62+
path: |
63+
${{ steps.pip-cache.outputs.dir }}
64+
/opt/hostedtoolcache/Python
65+
/Users/runner/hostedtoolcache/Python
66+
key: ${{ runner.os }}-${{ steps.setup-python.outputs.python-version }}-pip-${{ hashFiles(format('**/py{0}-ci-requirements.txt', env.PYTHON)) }}
67+
restore-keys: |
68+
${{ runner.os }}-${{ steps.setup-python.outputs.python-version }}-pip-
69+
- name: Install pip-tools
70+
run: pip install pip-tools
71+
- name: Install apache-arrow on ubuntu
72+
run: |
73+
sudo apt update
74+
sudo apt install -y -V ca-certificates lsb-release wget
75+
wget https://apache.jfrog.io/artifactory/arrow/$(lsb_release --id --short | tr 'A-Z' 'a-z')/apache-arrow-apt-source-latest-$(lsb_release --codename --short).deb
76+
sudo apt install -y -V ./apache-arrow-apt-source-latest-$(lsb_release --codename --short).deb
77+
sudo apt update
78+
sudo apt install -y -V libarrow-dev
79+
- name: Install Python dependencies
80+
run: make install-python-ci-dependencies
81+
- uses: actions/cache@v2
82+
with:
83+
path: ~/.m2/repository
84+
key: ${{ runner.os }}-it-maven-${{ hashFiles('**/pom.xml') }}
85+
restore-keys: |
86+
${{ runner.os }}-it-maven-
4187
- uses: actions/cache@v2
4288
with:
4389
path: ~/.m2/repository
@@ -98,6 +144,46 @@ jobs:
98144
aws-region: us-west-2
99145
- name: Use AWS CLI
100146
run: aws sts get-caller-identity
147+
- name: Setup Python (to call feast apply)
148+
uses: actions/setup-python@v2
149+
id: setup-python
150+
with:
151+
python-version: 3.8
152+
architecture: x64
153+
- name: Setup Go
154+
id: setup-go
155+
uses: actions/setup-go@v2
156+
with:
157+
go-version: 1.18.0
158+
- name: Upgrade pip version
159+
run: |
160+
pip install --upgrade "pip>=21.3.1,<22.1"
161+
- name: Get pip cache dir
162+
id: pip-cache
163+
run: |
164+
echo "::set-output name=dir::$(pip cache dir)"
165+
- name: pip cache
166+
uses: actions/cache@v2
167+
with:
168+
path: |
169+
${{ steps.pip-cache.outputs.dir }}
170+
/opt/hostedtoolcache/Python
171+
/Users/runner/hostedtoolcache/Python
172+
key: ${{ runner.os }}-${{ steps.setup-python.outputs.python-version }}-pip-${{ hashFiles(format('**/py{0}-ci-requirements.txt', env.PYTHON)) }}
173+
restore-keys: |
174+
${{ runner.os }}-${{ steps.setup-python.outputs.python-version }}-pip-
175+
- name: Install pip-tools
176+
run: pip install pip-tools
177+
- name: Install apache-arrow on ubuntu
178+
run: |
179+
sudo apt update
180+
sudo apt install -y -V ca-certificates lsb-release wget
181+
wget https://apache.jfrog.io/artifactory/arrow/$(lsb_release --id --short | tr 'A-Z' 'a-z')/apache-arrow-apt-source-latest-$(lsb_release --codename --short).deb
182+
sudo apt install -y -V ./apache-arrow-apt-source-latest-$(lsb_release --codename --short).deb
183+
sudo apt update
184+
sudo apt install -y -V libarrow-dev
185+
- name: Install Python dependencies
186+
run: make install-python-ci-dependencies
101187
- name: Run integration tests
102188
run: make test-java-integration
103189
- name: Save report

java/CONTRIBUTING.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ mvn spotless:apply
3636
#### Project Makefile
3737
The Project Makefile provides useful shorthands for common development tasks:
3838

39+
> Note: These commands rely on a local version of `feast` (Python) to be installed
3940
4041
Run all Unit tests:
4142
```

java/serving/README.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -136,4 +136,6 @@ Unit &amp; Integration Tests can be used to verify functionality:
136136
mvn test -pl serving --also-make
137137
# run integration tests
138138
mvn verify -pl serving --also-make
139+
# run integration tests with debugger
140+
mvn -Dmaven.failsafe.debug verify -pl serving --also-make
139141
```

java/serving/pom.xml

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,28 @@
8282
</configuration>
8383
</plugin>
8484

85+
<!-- Call feast apply before running integration tests -->
86+
<plugin>
87+
<groupId>org.codehaus.mojo</groupId>
88+
<artifactId>exec-maven-plugin</artifactId>
89+
<version>1.6.0</version>
90+
<executions>
91+
<execution>
92+
<configuration>
93+
<executable>python</executable>
94+
<workingDirectory>src/test/resources/docker-compose/feast10/</workingDirectory>
95+
<arguments>
96+
<argument>setup_it.py</argument>
97+
</arguments>
98+
</configuration>
99+
<id>feast_test_apply</id>
100+
<phase>process-test-resources</phase>
101+
<goals>
102+
<goal>exec</goal>
103+
</goals>
104+
</execution>
105+
</executions>
106+
</plugin>
85107
</plugins>
86108
</build>
87109

java/serving/src/test/resources/docker-compose/feast10/definitions.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -73,17 +73,17 @@ def transformed_conv_rate(features_df: pd.DataFrame) -> pd.DataFrame:
7373

7474
entity = Entity(name="entity", value_type=ValueType.STRING,)
7575

76-
benchmark_feature_views = [
77-
FeatureView(
76+
benchmark_feature_views = []
77+
for i in range(25):
78+
fv = FeatureView(
7879
name=f"feature_view_{i}",
7980
entities=[entity],
8081
ttl=Duration(seconds=86400),
8182
schema=[Field(name=f"feature_{10 * i + j}", dtype=Int64) for j in range(10)],
8283
online=True,
8384
source=generated_data_source,
8485
)
85-
for i in range(25)
86-
]
86+
benchmark_feature_views.append(fv)
8787

8888
benchmark_feature_service = FeatureService(
8989
name=f"benchmark_feature_service", features=benchmark_feature_views,
Binary file not shown.
Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
from pathlib import Path
2+
from feast.repo_config import load_repo_config
3+
from datetime import datetime, timedelta
4+
5+
import numpy as np
6+
import pandas as pd
7+
8+
from definitions import (
9+
benchmark_feature_service,
10+
benchmark_feature_views,
11+
driver,
12+
driver_hourly_stats_view,
13+
entity,
14+
transformed_conv_rate,
15+
)
16+
17+
from feast import FeatureStore
18+
19+
20+
def setup_data():
21+
start = datetime.now() - timedelta(days=10)
22+
23+
df = pd.DataFrame()
24+
df["driver_id"] = np.arange(1000, 1010)
25+
df["created"] = datetime.now()
26+
df["conv_rate"] = np.arange(0, 1, 0.1)
27+
df["acc_rate"] = np.arange(0.5, 1, 0.05)
28+
df["avg_daily_trips"] = np.arange(0, 1000, 100)
29+
30+
# some of rows are beyond 7 days to test OUTSIDE_MAX_AGE status
31+
df["event_timestamp"] = start + pd.Series(np.arange(0, 10)).map(
32+
lambda days: timedelta(days=days)
33+
)
34+
35+
# Store data in parquet files. Parquet is convenient for local development mode. For
36+
# production, you can use your favorite DWH, such as BigQuery. See Feast documentation
37+
# for more info.
38+
df.to_parquet("driver_stats.parquet")
39+
40+
# For Benchmarks
41+
# Please read more in Feast RFC-031
42+
# (link https://docs.google.com/document/d/12UuvTQnTTCJhdRgy6h10zSbInNGSyEJkIxpOcgOen1I/edit)
43+
# about this benchmark setup
44+
def generate_data(
45+
num_rows: int, num_features: int, destination: str
46+
) -> pd.DataFrame:
47+
features = [f"feature_{i}" for i in range(num_features)]
48+
columns = ["entity", "event_timestamp"] + features
49+
df = pd.DataFrame(0, index=np.arange(num_rows), columns=columns)
50+
df["event_timestamp"] = datetime.utcnow()
51+
for column in features:
52+
df[column] = np.random.randint(1, num_rows, num_rows)
53+
54+
df["entity"] = "key-" + pd.Series(np.arange(1, num_rows + 1)).astype(
55+
pd.StringDtype()
56+
)
57+
58+
df.to_parquet(destination)
59+
60+
generate_data(10**3, 250, "benchmark_data.parquet")
61+
62+
63+
def main():
64+
print("Running setup_it.py")
65+
66+
setup_data()
67+
existing_repo_config = load_repo_config(Path("."))
68+
69+
# Update to default online store since otherwise, relies on Dockerized Redis service
70+
fs = FeatureStore(config=existing_repo_config.copy(update={"online_store": {}}))
71+
fs.apply(
72+
[
73+
driver_hourly_stats_view,
74+
transformed_conv_rate,
75+
driver,
76+
entity,
77+
benchmark_feature_service,
78+
*benchmark_feature_views,
79+
]
80+
)
81+
82+
print("setup_it finished")
83+
84+
85+
if __name__ == "__main__":
86+
main()

sdk/python/feast/diff/registry_diff.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -144,8 +144,8 @@ def diff_registry_objects(
144144
continue
145145
elif getattr(current_spec, _field.name) != getattr(new_spec, _field.name):
146146
if _field.name == "user_defined_function":
147-
current_spec = cast(OnDemandFeatureViewSpec, current_proto)
148-
new_spec = cast(OnDemandFeatureViewSpec, new_proto)
147+
current_spec = cast(OnDemandFeatureViewSpec, current_spec)
148+
new_spec = cast(OnDemandFeatureViewSpec, new_spec)
149149
current_udf = current_spec.user_defined_function
150150
new_udf = new_spec.user_defined_function
151151
for _udf_field in current_udf.DESCRIPTOR.fields:

sdk/python/feast/feature_logging.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -34,12 +34,12 @@ class LoggingSource:
3434

3535
@abc.abstractmethod
3636
def get_schema(self, registry: "BaseRegistry") -> pa.Schema:
37-
""" Generate schema for logs destination. """
37+
"""Generate schema for logs destination."""
3838
raise NotImplementedError
3939

4040
@abc.abstractmethod
4141
def get_log_timestamp_column(self) -> str:
42-
""" Return timestamp column that must exist in generated schema. """
42+
"""Return timestamp column that must exist in generated schema."""
4343
raise NotImplementedError
4444

4545

0 commit comments

Comments
 (0)