Skip to content

Commit 0427122

Browse files
authored
fix(ingest): allow hiding some fields from the schema (#6077)
1 parent 128e3a8 commit 0427122

File tree

9 files changed

+29
-15
lines changed

9 files changed

+29
-15
lines changed

metadata-ingestion/docs/sources/bigquery/bigquery-beta_pre.md

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -83,11 +83,11 @@ Use `profiling.bigquery_temp_table_schema` to restrict to one specific dataset t
8383

8484
```yml
8585
credential:
86-
project_id: project-id-1234567
87-
private_key_id: "d0121d0000882411234e11166c6aaa23ed5d74e0"
88-
private_key: "-----BEGIN PRIVATE KEY-----\nMIIyourkey\n-----END PRIVATE KEY-----\n"
89-
client_email: "[email protected]"
90-
client_id: "123456678890"
86+
project_id: project-id-1234567
87+
private_key_id: "d0121d0000882411234e11166c6aaa23ed5d74e0"
88+
private_key: "-----BEGIN PRIVATE KEY-----\nMIIyourkey\n-----END PRIVATE KEY-----\n"
89+
client_email: "[email protected]"
90+
client_id: "123456678890"
9191
```
9292
9393
### Lineage Computation Details

metadata-ingestion/src/datahub/configuration/common.py

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
import re
22
from abc import ABC, abstractmethod
33
from enum import auto
4-
from typing import IO, Any, ClassVar, Dict, List, Optional, Pattern, cast
4+
from typing import IO, Any, ClassVar, Dict, List, Optional, Pattern, Type, cast
55

66
from cached_property import cached_property
77
from pydantic import BaseModel, Extra
@@ -18,6 +18,18 @@ class Config:
1818
cached_property,
1919
) # needed to allow cached_property to work. See https://github.com/samuelcolvin/pydantic/issues/1241 for more info.
2020

21+
@staticmethod
22+
def schema_extra(schema: Dict[str, Any], model: Type["ConfigModel"]) -> None:
23+
# We use the custom "hidden_from_schema" attribute to hide fields from the
24+
# autogenerated docs.
25+
remove_fields = []
26+
for key, prop in schema.get("properties", {}).items():
27+
if prop.get("hidden_from_schema"):
28+
remove_fields.append(key)
29+
30+
for key in remove_fields:
31+
del schema["properties"][key]
32+
2133

2234
class PermissiveConfigModel(ConfigModel):
2335
# A permissive config model that allows extra fields.

metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_config.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,7 @@ class BigQueryV2Config(BigQueryConfig):
6666
default=None,
6767
description="[deprecated] Use project_id_pattern instead.",
6868
)
69-
storage_project_id: None = Field(default=None, exclude=True)
69+
storage_project_id: None = Field(default=None, hidden_from_schema=True)
7070

7171
lineage_use_sql_parser: bool = Field(
7272
default=False,

metadata-ingestion/src/datahub/ingestion/source/sql/clickhouse.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -120,9 +120,9 @@ class ClickHouseConfig(
120120
):
121121
# defaults
122122
host_port = Field(default="localhost:8123", description="ClickHouse host URL.")
123-
scheme = Field(default="clickhouse", description="", exclude=True)
123+
scheme = Field(default="clickhouse", description="", hidden_from_schema=True)
124124
password: pydantic.SecretStr = Field(
125-
default=pydantic.SecretStr(""), exclude=True, description="password"
125+
default=pydantic.SecretStr(""), description="password"
126126
)
127127

128128
secure: Optional[bool] = Field(default=None, description="")

metadata-ingestion/src/datahub/ingestion/source/sql/hive.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -93,14 +93,14 @@ def dbapi_get_columns_patched(self, connection, table_name, schema=None, **kw):
9393

9494
class HiveConfig(BasicSQLAlchemyConfig):
9595
# defaults
96-
scheme = Field(default="hive", exclude=True)
96+
scheme = Field(default="hive", hidden_from_schema=True)
9797

9898
# Hive SQLAlchemy connector returns views as tables.
9999
# See https://github.com/dropbox/PyHive/blob/b21c507a24ed2f2b0cf15b0b6abb1c43f31d3ee0/pyhive/sqlalchemy_hive.py#L270-L273.
100100
# Disabling views helps us prevent this duplication.
101101
include_views = Field(
102102
default=False,
103-
exclude=True,
103+
hidden_from_schema=True,
104104
description="Hive SQLAlchemy connector returns views as tables. See https://github.com/dropbox/PyHive/blob/b21c507a24ed2f2b0cf15b0b6abb1c43f31d3ee0/pyhive/sqlalchemy_hive.py#L270-L273. Disabling views helps us prevent this duplication.",
105105
)
106106

metadata-ingestion/src/datahub/ingestion/source/sql/mssql.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@
3434
class SQLServerConfig(BasicSQLAlchemyConfig):
3535
# defaults
3636
host_port: str = Field(default="localhost:1433", description="MSSQL host URL.")
37-
scheme: str = Field(default="mssql+pytds", description="", exclude=True)
37+
scheme: str = Field(default="mssql+pytds", description="", hidden_from_schema=True)
3838
use_odbc: bool = Field(
3939
default=False,
4040
description="See https://docs.sqlalchemy.org/en/14/dialects/mssql.html#module-sqlalchemy.dialects.mssql.pyodbc.",

metadata-ingestion/src/datahub/ingestion/source/sql/presto_on_hive.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -101,7 +101,9 @@ class PrestoOnHiveConfig(BasicSQLAlchemyConfig):
101101
default="localhost:3306",
102102
description="Host URL and port to connect to. Example: localhost:3306",
103103
)
104-
scheme: str = Field(default="mysql+pymysql", description="", exclude=True)
104+
scheme: str = Field(
105+
default="mysql+pymysql", description="", hidden_from_schema=True
106+
)
105107
metastore_db_name: Optional[str] = Field(
106108
default=None,
107109
description="Name of the Hive metastore's database (usually: metastore). For backward compatibility, if this field is not provided, the database field will be used. If both the 'database' and 'metastore_db_name' fields are set then the 'database' field will be used to filter the hive/presto/trino database",

metadata-ingestion/src/datahub/ingestion/source/sql/redshift.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -141,7 +141,7 @@ class RedshiftConfig(
141141
scheme = Field(
142142
default="redshift+psycopg2",
143143
description="",
144-
exclude=True,
144+
hidden_from_schema=True,
145145
)
146146

147147
default_schema: str = Field(

metadata-ingestion/src/datahub/ingestion/source/sql/trino.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -131,7 +131,7 @@ def _get_columns(self, connection, table_name, schema: str = None, **kw): # typ
131131

132132
class TrinoConfig(BasicSQLAlchemyConfig):
133133
# defaults
134-
scheme = Field(default="trino", description="", exclude=True)
134+
scheme = Field(default="trino", description="", hidden_from_schema=True)
135135

136136
def get_identifier(self: BasicSQLAlchemyConfig, schema: str, table: str) -> str:
137137
regular = f"{schema}.{table}"

0 commit comments

Comments
 (0)