Skip to content

Commit a885323

Browse files
committed
bumping json serialization to 2.2.0, adding metadata.is_formatted to reflect when a table should be rendered as (Formatted) html
1 parent a698609 commit a885323

File tree

5 files changed

+120
-12
lines changed

5 files changed

+120
-12
lines changed

dataframe-json/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/io/writeJson.kt

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ import org.jetbrains.kotlinx.dataframe.AnyCol
1919
import org.jetbrains.kotlinx.dataframe.AnyFrame
2020
import org.jetbrains.kotlinx.dataframe.ColumnsContainer
2121
import org.jetbrains.kotlinx.dataframe.DataColumn
22+
import org.jetbrains.kotlinx.dataframe.api.FormattedFrame
2223
import org.jetbrains.kotlinx.dataframe.api.indices
2324
import org.jetbrains.kotlinx.dataframe.api.isList
2425
import org.jetbrains.kotlinx.dataframe.api.rows
@@ -29,6 +30,7 @@ import org.jetbrains.kotlinx.dataframe.columns.ColumnKind
2930
import org.jetbrains.kotlinx.dataframe.columns.FrameColumn
3031
import org.jetbrains.kotlinx.dataframe.impl.io.SerializationKeys.COLUMNS
3132
import org.jetbrains.kotlinx.dataframe.impl.io.SerializationKeys.DATA
33+
import org.jetbrains.kotlinx.dataframe.impl.io.SerializationKeys.IS_FORMATTED
3234
import org.jetbrains.kotlinx.dataframe.impl.io.SerializationKeys.KIND
3335
import org.jetbrains.kotlinx.dataframe.impl.io.SerializationKeys.KOTLIN_DATAFRAME
3436
import org.jetbrains.kotlinx.dataframe.impl.io.SerializationKeys.METADATA
@@ -52,7 +54,7 @@ import java.io.IOException
5254

5355
// See docs/serialization_format.md for a description of
5456
// serialization versions and format.
55-
internal const val SERIALIZATION_VERSION = "2.1.1"
57+
internal const val SERIALIZATION_VERSION = "2.2.0"
5658

5759
internal object SerializationKeys {
5860
const val DATA = "data"
@@ -65,6 +67,7 @@ internal object SerializationKeys {
6567
const val KOTLIN_DATAFRAME = "kotlin_dataframe"
6668
const val TYPE = "type"
6769
const val TYPES = "types"
70+
const val IS_FORMATTED = "is_formatted"
6871
}
6972

7073
private val valueTypes =
@@ -196,10 +199,12 @@ internal class DataframeConvertableEncoder(
196199
rowLimit,
197200
encoders,
198201
)
202+
val isFormatted = input is FormattedFrame<*>
199203
buildJsonObject {
200204
put(DATA, data)
201205
putJsonObject(METADATA) {
202206
put(KIND, JsonPrimitive(CellKind.DataFrameConvertable.toString()))
207+
put(IS_FORMATTED, JsonPrimitive(isFormatted))
203208
}
204209
}
205210
} ?: JsonPrimitive(null)
@@ -377,6 +382,7 @@ internal fun encodeDataFrameWithMetadata(
377382
rowLimit: Int,
378383
nestedRowLimit: Int? = null,
379384
customEncoders: List<CustomEncoder> = emptyList(),
385+
isFormatted: Boolean = false,
380386
): JsonObject =
381387
buildJsonObject {
382388
put(VERSION, JsonPrimitive(SERIALIZATION_VERSION))
@@ -391,6 +397,7 @@ internal fun encodeDataFrameWithMetadata(
391397
}
392398
put(NROW, JsonPrimitive(frame.rowsCount()))
393399
put(NCOL, JsonPrimitive(frame.columnsCount()))
400+
put(IS_FORMATTED, JsonPrimitive(isFormatted))
394401
}
395402
put(
396403
KOTLIN_DATAFRAME,
@@ -403,11 +410,12 @@ internal fun encodeDataFrameWithMetadata(
403410
}
404411

405412
@OptIn(ExperimentalSerializationApi::class)
406-
internal fun encodeFrameNoDynamicNestedTables(df: AnyFrame, limit: Int): JsonObject =
413+
internal fun encodeFrameNoDynamicNestedTables(df: AnyFrame, limit: Int, isFormatted: Boolean): JsonObject =
407414
buildJsonObject {
408415
put(NROW, df.rowsCount())
409416
put(NCOL, df.columnsCount())
410417
putJsonArray(COLUMNS) { addAll(df.columnNames()) }
418+
put(IS_FORMATTED, JsonPrimitive(isFormatted))
411419
put(
412420
KOTLIN_DATAFRAME,
413421
encodeFrame(df.take(limit)),

dataframe-json/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/json.kt

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,10 @@ import kotlinx.serialization.json.decodeFromStream
77
import org.intellij.lang.annotations.Language
88
import org.jetbrains.kotlinx.dataframe.AnyFrame
99
import org.jetbrains.kotlinx.dataframe.AnyRow
10+
import org.jetbrains.kotlinx.dataframe.DataColumn
1011
import org.jetbrains.kotlinx.dataframe.DataFrame
1112
import org.jetbrains.kotlinx.dataframe.DataRow
13+
import org.jetbrains.kotlinx.dataframe.api.FormattedFrame
1214
import org.jetbrains.kotlinx.dataframe.api.JsonPath
1315
import org.jetbrains.kotlinx.dataframe.api.KeyValueProperty
1416
import org.jetbrains.kotlinx.dataframe.api.single
@@ -320,6 +322,10 @@ public fun AnyFrame.toJson(prettyPrint: Boolean = false): String {
320322
* @param prettyPrint Specifies whether the output JSON should be formatted with indentation and line breaks.
321323
* @param customEncoders The options for encoding things like images.
322324
* The default is empty list, which indicates that the image is not encoded as Base64.
325+
* @param isFormatted Specifies whether the DataFrame should be formatted,
326+
* a.k.a. it comes from [FormattedFrame.df] or it contains a
327+
* [DataColumn][DataColumn]`<`[FormattedFrame][FormattedFrame]`<*>>` at any depth.
328+
* This is just a marker; formatting is applied by the renderer. Defaults to `false`.
323329
*
324330
* @return The DataFrame converted to a JSON string with metadata.
325331
*/
@@ -328,6 +334,7 @@ public fun AnyFrame.toJsonWithMetadata(
328334
nestedRowLimit: Int? = null,
329335
prettyPrint: Boolean = false,
330336
customEncoders: List<CustomEncoder> = emptyList(),
337+
isFormatted: Boolean = false,
331338
): String {
332339
val json = Json {
333340
this.prettyPrint = prettyPrint
@@ -336,7 +343,13 @@ public fun AnyFrame.toJsonWithMetadata(
336343
}
337344
return json.encodeToString(
338345
JsonElement.serializer(),
339-
encodeDataFrameWithMetadata(this@toJsonWithMetadata, rowLimit, nestedRowLimit, customEncoders),
346+
encodeDataFrameWithMetadata(
347+
frame = this@toJsonWithMetadata,
348+
rowLimit = rowLimit,
349+
nestedRowLimit = nestedRowLimit,
350+
customEncoders = customEncoders,
351+
isFormatted = isFormatted,
352+
),
340353
)
341354
}
342355

dataframe-json/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/JsonTests.kt

Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ import io.kotest.matchers.collections.shouldBeIn
66
import io.kotest.matchers.shouldBe
77
import io.kotest.matchers.string.shouldContain
88
import io.kotest.matchers.string.shouldNotContain
9+
import io.kotest.matchers.string.shouldStartWith
910
import io.kotest.matchers.types.instanceOf
1011
import io.kotest.matchers.types.shouldBeInstanceOf
1112
import kotlinx.serialization.json.Json
@@ -20,35 +21,43 @@ import org.intellij.lang.annotations.Language
2021
import org.jetbrains.kotlinx.dataframe.AnyFrame
2122
import org.jetbrains.kotlinx.dataframe.DataFrame
2223
import org.jetbrains.kotlinx.dataframe.DataRow
24+
import org.jetbrains.kotlinx.dataframe.api.FormattedFrame
2325
import org.jetbrains.kotlinx.dataframe.api.JsonPath
2426
import org.jetbrains.kotlinx.dataframe.api.allNulls
2527
import org.jetbrains.kotlinx.dataframe.api.colsOf
2628
import org.jetbrains.kotlinx.dataframe.api.columnsCount
2729
import org.jetbrains.kotlinx.dataframe.api.convert
2830
import org.jetbrains.kotlinx.dataframe.api.dataFrameOf
2931
import org.jetbrains.kotlinx.dataframe.api.forEach
32+
import org.jetbrains.kotlinx.dataframe.api.format
3033
import org.jetbrains.kotlinx.dataframe.api.getColumnGroup
34+
import org.jetbrains.kotlinx.dataframe.api.getColumns
3135
import org.jetbrains.kotlinx.dataframe.api.getFrameColumn
3236
import org.jetbrains.kotlinx.dataframe.api.print
3337
import org.jetbrains.kotlinx.dataframe.api.schema
3438
import org.jetbrains.kotlinx.dataframe.api.toFloat
3539
import org.jetbrains.kotlinx.dataframe.api.toMap
40+
import org.jetbrains.kotlinx.dataframe.api.with
3641
import org.jetbrains.kotlinx.dataframe.columns.ColumnGroup
3742
import org.jetbrains.kotlinx.dataframe.columns.ColumnKind
3843
import org.jetbrains.kotlinx.dataframe.columns.FrameColumn
3944
import org.jetbrains.kotlinx.dataframe.columns.ValueColumn
4045
import org.jetbrains.kotlinx.dataframe.impl.io.SERIALIZATION_VERSION
4146
import org.jetbrains.kotlinx.dataframe.impl.io.SerializationKeys.COLUMNS
4247
import org.jetbrains.kotlinx.dataframe.impl.io.SerializationKeys.DATA
48+
import org.jetbrains.kotlinx.dataframe.impl.io.SerializationKeys.IS_FORMATTED
4349
import org.jetbrains.kotlinx.dataframe.impl.io.SerializationKeys.KIND
4450
import org.jetbrains.kotlinx.dataframe.impl.io.SerializationKeys.KOTLIN_DATAFRAME
4551
import org.jetbrains.kotlinx.dataframe.impl.io.SerializationKeys.METADATA
4652
import org.jetbrains.kotlinx.dataframe.impl.io.SerializationKeys.NCOL
4753
import org.jetbrains.kotlinx.dataframe.impl.io.SerializationKeys.NROW
54+
import org.jetbrains.kotlinx.dataframe.impl.io.SerializationKeys.TYPE
55+
import org.jetbrains.kotlinx.dataframe.impl.io.SerializationKeys.TYPES
4856
import org.jetbrains.kotlinx.dataframe.impl.io.SerializationKeys.VERSION
4957
import org.jetbrains.kotlinx.dataframe.impl.io.readJsonImpl
5058
import org.jetbrains.kotlinx.dataframe.io.JSON.TypeClashTactic.ANY_COLUMNS
5159
import org.jetbrains.kotlinx.dataframe.io.JSON.TypeClashTactic.ARRAY_AND_VALUE_COLUMNS
60+
import org.jetbrains.kotlinx.dataframe.jupyter.KotlinNotebookPluginUtils.convertToDataFrame
5261
import java.net.URL
5362
import kotlin.reflect.KType
5463
import kotlin.reflect.typeOf
@@ -1040,6 +1049,35 @@ class JsonTests {
10401049
val metadata = json[METADATA]!!.jsonObject
10411050
metadata[NROW]!!.jsonPrimitive.int shouldBe 1
10421051
metadata[NCOL]!!.jsonPrimitive.int shouldBe 4
1052+
metadata[IS_FORMATTED]!!.jsonPrimitive.boolean shouldBe false
1053+
val columns = metadata[COLUMNS]!!.jsonArray.map { it.jsonPrimitive.content }
1054+
columns shouldBe listOf("id", "node_id", "name", "full_name")
1055+
1056+
val decodedData = json[KOTLIN_DATAFRAME]!!.jsonArray
1057+
val decodedDf = DataFrame.readJsonStr(decodedData.toString())
1058+
decodedDf shouldBe df
1059+
}
1060+
1061+
@Suppress("USELESS_IS_CHECK")
1062+
@Test
1063+
fun `json with metadata flat formatted table`() {
1064+
@Language("json")
1065+
val data =
1066+
"""
1067+
[{"id":3602279,"node_id":"MDEwOlJlcG9zaXRvcnkzNjAyMjc5","name":"kotlin-web-demo","full_name":"JetBrains/kotlin-web-demo"}]
1068+
""".trimIndent()
1069+
// simulating the functions used to define whether the dataframe is formatted
1070+
val formattedDf = DataFrame.readJsonStr(data).format().with { background(blue) }
1071+
val df = convertToDataFrame(formattedDf)
1072+
val jsonStr = df.toJsonWithMetadata(df.rowsCount(), isFormatted = formattedDf is FormattedFrame<*>).trimIndent()
1073+
val json = parseJsonStr(jsonStr)
1074+
1075+
json[VERSION]!!.jsonPrimitive.content shouldBe SERIALIZATION_VERSION
1076+
1077+
val metadata = json[METADATA]!!.jsonObject
1078+
metadata[NROW]!!.jsonPrimitive.int shouldBe 1
1079+
metadata[NCOL]!!.jsonPrimitive.int shouldBe 4
1080+
metadata[IS_FORMATTED]!!.jsonPrimitive.boolean shouldBe true
10431081
val columns = metadata[COLUMNS]!!.jsonArray.map { it.jsonPrimitive.content }
10441082
columns shouldBe listOf("id", "node_id", "name", "full_name")
10451083

@@ -1079,6 +1117,8 @@ class JsonTests {
10791117
val df = DataFrame.readJson(testJson("repositories"))
10801118
val jsonStr = df.toJsonWithMetadata(df.rowsCount()).trimIndent()
10811119
val json = parseJsonStr(jsonStr)
1120+
json[METADATA]!!.jsonObject[IS_FORMATTED]!!.jsonPrimitive.boolean shouldBe false
1121+
10821122
val row = json[KOTLIN_DATAFRAME]!!.jsonArray[0].jsonObject
10831123

10841124
val contributors = row["contributors"]!!.jsonObject
@@ -1101,6 +1141,8 @@ class JsonTests {
11011141
val nestedFrameRowLimit = 20
11021142
val jsonStr = df.toJsonWithMetadata(df.rowsCount(), nestedFrameRowLimit).trimIndent()
11031143
val json = parseJsonStr(jsonStr)
1144+
json[METADATA]!!.jsonObject[IS_FORMATTED]!!.jsonPrimitive.boolean shouldBe false
1145+
11041146
val row = json[KOTLIN_DATAFRAME]!!.jsonArray[0].jsonObject
11051147

11061148
val contributors = row["contributors"]!!.jsonObject
@@ -1114,6 +1156,33 @@ class JsonTests {
11141156
decodedData.size shouldBe nestedFrameRowLimit
11151157
}
11161158

1159+
@Test
1160+
fun `json with metadata containing formatted nested frames`() {
1161+
val df = DataFrame.readJson(testJson("repositories"))
1162+
.convert { frameCols() }.with { it.format().with { background(blue) } }
1163+
1164+
// simulating the functions used to define whether the dataframe is formatted
1165+
val hasFormattedColumns = df.getColumns { colsAtAnyDepth().colsOf<FormattedFrame<*>?>() }.isNotEmpty()
1166+
val jsonStr = df.toJsonWithMetadata(df.rowsCount(), isFormatted = hasFormattedColumns).trimIndent()
1167+
1168+
val json = parseJsonStr(jsonStr)
1169+
val metadata = json[METADATA]!!.jsonObject
1170+
metadata[IS_FORMATTED]!!.jsonPrimitive.boolean shouldBe true
1171+
metadata[NCOL]!!.jsonPrimitive.int shouldBe 1
1172+
metadata[NROW]!!.jsonPrimitive.int shouldBe 1
1173+
metadata[COLUMNS]!!.jsonArray.single().jsonPrimitive.content shouldBe "contributors"
1174+
metadata[TYPES]!!.jsonArray.single().jsonObject.let {
1175+
it[KIND]!!.jsonPrimitive.content shouldBe "ValueColumn"
1176+
it[TYPE]!!.jsonPrimitive.content shouldStartWith FormattedFrame::class.qualifiedName!!
1177+
}
1178+
1179+
val row = json[KOTLIN_DATAFRAME]!!.jsonArray[0].jsonObject
1180+
1181+
// is read as value column
1182+
val contributors = row["contributors"]!!.jsonPrimitive.content
1183+
contributors shouldStartWith FormattedFrame::class.qualifiedName!!
1184+
}
1185+
11171186
@Test
11181187
fun `serialize column with list of objects`() {
11191188
val df = dataFrameOf("col")(Regex(".+").findAll("abc").toList())

dataframe-jupyter/src/main/kotlin/org/jetbrains/kotlinx/dataframe/jupyter/JupyterHtmlRenderer.kt

Lines changed: 16 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,9 @@ package org.jetbrains.kotlinx.dataframe.jupyter
22

33
import kotlinx.serialization.ExperimentalSerializationApi
44
import org.jetbrains.kotlinx.dataframe.AnyFrame
5+
import org.jetbrains.kotlinx.dataframe.api.FormattedFrame
6+
import org.jetbrains.kotlinx.dataframe.api.colsOf
7+
import org.jetbrains.kotlinx.dataframe.api.getColumns
58
import org.jetbrains.kotlinx.dataframe.io.Base64ImageEncodingOptions
69
import org.jetbrains.kotlinx.dataframe.io.CustomEncoder
710
import org.jetbrains.kotlinx.dataframe.io.DataFrameHtmlData
@@ -40,6 +43,9 @@ internal inline fun <reified T : Any> JupyterHtmlRenderer.render(
4043

4144
val df = convertToDataFrame(value)
4245

46+
val isFormatted = reifiedDisplayConfiguration.cellFormatter != null ||
47+
df.hasFormattedColumns()
48+
4349
val limit = if (applyRowsLimit) {
4450
reifiedDisplayConfiguration.rowsLimit ?: df.rowsCount()
4551
} else {
@@ -67,10 +73,10 @@ internal inline fun <reified T : Any> JupyterHtmlRenderer.render(
6773
// TODO Do we need to handle the improved meta data here as well?
6874
val jsonEncodedDf = when {
6975
!ideBuildNumber.supportsDynamicNestedTables() ->
70-
encodeFrameNoDynamicNestedTables(df, limit).toString()
76+
encodeFrameNoDynamicNestedTables(df = df, limit = limit, isFormatted = isFormatted).toString()
7177

7278
else -> {
73-
val encoders = buildList<CustomEncoder> {
79+
val encoders = buildList {
7480
if (ideBuildNumber.supportsDataFrameConvertableValues()) {
7581
add(DataframeConvertableEncoder(this))
7682
}
@@ -83,16 +89,20 @@ internal inline fun <reified T : Any> JupyterHtmlRenderer.render(
8389
rowLimit = limit,
8490
nestedRowLimit = reifiedDisplayConfiguration.rowsLimit,
8591
customEncoders = encoders,
92+
isFormatted = isFormatted,
8693
)
8794
}
8895
}
8996

90-
notebook.renderAsIFrameAsNeeded(html, staticHtml, jsonEncodedDf)
97+
notebook.renderAsIFrameAsNeeded(data = html, staticData = staticHtml, jsonEncodedDf = jsonEncodedDf)
9198
} else {
92-
notebook.renderHtmlAsIFrameIfNeeded(html)
99+
notebook.renderHtmlAsIFrameIfNeeded(data = html)
93100
}
94101
}
95102

103+
internal fun AnyFrame.hasFormattedColumns() =
104+
this.getColumns { colsAtAnyDepth().colsOf<FormattedFrame<*>?>() }.isNotEmpty()
105+
96106
private fun KotlinNotebookPluginUtils.IdeBuildNumber?.supportsDynamicNestedTables() =
97107
this != null && majorVersion >= MIN_IDE_VERSION_SUPPORT_JSON_WITH_METADATA
98108

@@ -125,8 +135,8 @@ internal fun DataFrameHtmlData.toJupyterHtmlData() = HtmlData(style, body, scrip
125135
// region friend module error suppression
126136

127137
@Suppress("INVISIBLE_REFERENCE")
128-
private fun encodeFrameNoDynamicNestedTables(df: AnyFrame, limit: Int) =
129-
org.jetbrains.kotlinx.dataframe.impl.io.encodeFrameNoDynamicNestedTables(df, limit)
138+
private fun encodeFrameNoDynamicNestedTables(df: AnyFrame, limit: Int, isFormatted: Boolean) =
139+
org.jetbrains.kotlinx.dataframe.impl.io.encodeFrameNoDynamicNestedTables(df, limit, isFormatted)
130140

131141
@Suppress("INVISIBLE_REFERENCE", "ktlint:standard:function-naming")
132142
private fun DataframeConvertableEncoder(encoders: List<CustomEncoder>, rowLimit: Int? = null) =

docs/serialization_format.md

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
This document is an informal specification of the serialization format used for
33
rendering Kotlin dataframes in the Kotlin notebooks plugin of IntelliJ IDEA.
44

5-
### Version 2.1.0
5+
### Version 2.2.0
66

77
**1.0.0:**
88

@@ -18,15 +18,23 @@ rendering Kotlin dataframes in the Kotlin notebooks plugin of IntelliJ IDEA.
1818
**2.1.1:**
1919
* Added a new type of `ValueColumn` value that is DataFrameConvertable type
2020

21+
**2.2.0:**
22+
* Added the `is_formatted` property in the `metadata` of the dataframe.
23+
It's a marker that indicates that the dataframe is the result of a `FormattedFrame`
24+
or it contains a `DataColumn<FormattedFrame<*>>` at any depth.
25+
This currently means the rendering should be handled by the HTML renderer instead of the "native" one.
26+
This property may also be used in the future if the "native" renderer ever gains formatting capabilities.
27+
2128
### Top level json structure
2229
```json
2330
{
24-
"$version": "2.1.0",
31+
"$version": "2.2.0",
2532
"metadata": {
2633
"columns": [ string, ... ], // column names
2734
"types": [ TypeDescriptor, ... ] // type description for each entry in "columns"
2835
"nrow": int,
29-
"ncol": int
36+
"ncol": int,
37+
"is_formatted": boolean
3038
},
3139
"kotlin_dataframe": [ Row, ... ]
3240
}

0 commit comments

Comments
 (0)