|
54 | 54 | convert_sf_to_sp_type,
|
55 | 55 | convert_sp_to_sf_type,
|
56 | 56 | )
|
| 57 | +from snowflake.snowpark._internal.udf_utils import get_types_from_type_hints |
57 | 58 | from snowflake.snowpark._internal.utils import (
|
| 59 | + STAGE_PREFIX, |
58 | 60 | XML_ROW_TAG_STRING,
|
59 | 61 | XML_ROW_DATA_COLUMN_NAME,
|
60 | 62 | XML_READER_FILE_PATH,
|
61 | 63 | XML_READER_API_SIGNATURE,
|
| 64 | + XML_READER_SQL_COMMENT, |
62 | 65 | INFER_SCHEMA_FORMAT_TYPES,
|
63 | 66 | SNOWFLAKE_PATH_PREFIXES,
|
64 | 67 | TempObjectType,
|
|
70 | 73 | private_preview,
|
71 | 74 | random_name_for_temp_object,
|
72 | 75 | warning,
|
| 76 | + is_in_stored_procedure, |
73 | 77 | )
|
74 | 78 | from snowflake.snowpark.column import METADATA_COLUMN_TYPES, Column, _to_col_if_str
|
75 | 79 | from snowflake.snowpark.dataframe import DataFrame
|
@@ -1106,13 +1110,40 @@ def _read_semi_structured_file(self, path: str, format: str) -> DataFrame:
|
1106 | 1110 | "rowTag",
|
1107 | 1111 | "rowTag for reading XML file is in private preview since 1.31.0. Do not use it in production.",
|
1108 | 1112 | )
|
| 1113 | + |
| 1114 | + if is_in_stored_procedure(): # pragma: no cover |
| 1115 | + # create a temp stage for udtf import files |
| 1116 | + # we have to use "temp" object instead of "scoped temp" object in stored procedure |
| 1117 | + # so we need to upload the file to the temp stage first to use register_from_file |
| 1118 | + temp_stage = random_name_for_temp_object(TempObjectType.STAGE) |
| 1119 | + sql_create_temp_stage = f"create temp stage if not exists {temp_stage} {XML_READER_SQL_COMMENT}" |
| 1120 | + self._session.sql(sql_create_temp_stage, _emit_ast=False).collect( |
| 1121 | + _emit_ast=False |
| 1122 | + ) |
| 1123 | + self._session._conn.upload_file( |
| 1124 | + XML_READER_FILE_PATH, |
| 1125 | + temp_stage, |
| 1126 | + compress_data=False, |
| 1127 | + overwrite=True, |
| 1128 | + skip_upload_on_content_match=True, |
| 1129 | + ) |
| 1130 | + python_file_path = f"{STAGE_PREFIX}{temp_stage}/{os.path.basename(XML_READER_FILE_PATH)}" |
| 1131 | + else: |
| 1132 | + python_file_path = XML_READER_FILE_PATH |
| 1133 | + |
| 1134 | + # create udtf |
| 1135 | + handler_name = "XMLReader" |
| 1136 | + _, input_types = get_types_from_type_hints( |
| 1137 | + (XML_READER_FILE_PATH, handler_name), TempObjectType.TABLE_FUNCTION |
| 1138 | + ) |
1109 | 1139 | output_schema = StructType(
|
1110 | 1140 | [StructField(XML_ROW_DATA_COLUMN_NAME, VariantType(), True)]
|
1111 | 1141 | )
|
1112 | 1142 | xml_reader_udtf = self._session.udtf.register_from_file(
|
1113 |
| - XML_READER_FILE_PATH, |
1114 |
| - "XMLReader", |
| 1143 | + python_file_path, |
| 1144 | + handler_name, |
1115 | 1145 | output_schema=output_schema,
|
| 1146 | + input_types=input_types, |
1116 | 1147 | packages=["snowflake-snowpark-python"],
|
1117 | 1148 | replace=True,
|
1118 | 1149 | )
|
|
0 commit comments