-
Notifications
You must be signed in to change notification settings - Fork 75
Fix for #573: Change serialization format for rendering in IntelliJ IDEA #574
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Merged
Changes from 10 commits
Commits
Show all changes
12 commits
Select commit
Hold shift + click to select a range
15a55e5
Update serialization for rendering of dataframe in Kotlin notebooks p…
ermolenkodev d3f64aa
Update JSON serialization tests according changes in format
ermolenkodev 74e0e56
Fix the imports that were accidentally replaced by wildcards *.
ermolenkodev 16d4d46
Extract large test json to the resource file
ermolenkodev c565e80
Fix formatting in json.kt
ermolenkodev d1e2ee7
Replace '.name' with '.toString()' in ColumnKind serialization implem…
ermolenkodev 240181d
Add documentation for Kotlin notebooks serialization format
ermolenkodev 5a19da6
Add support for different IDE versions in DataFrame rendering
ermolenkodev 2265c8e
Update build number condition check in KotlinNotebookPluginUtils
ermolenkodev fabf3dc
Refactor json serialization code
ermolenkodev e6becba
Remove unnecessary fully qualified references in writeJson.kt
ermolenkodev 7a4ad5c
Refactor DataFrame row limiting code
ermolenkodev File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
606 changes: 606 additions & 0 deletions
606
core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/io/readJson.kt
Large diffs are not rendered by default.
Oops, something went wrong.
212 changes: 212 additions & 0 deletions
212
core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/io/writeJson.kt
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,212 @@ | ||
package org.jetbrains.kotlinx.dataframe.impl.io | ||
|
||
import com.beust.klaxon.JsonArray | ||
import com.beust.klaxon.JsonObject | ||
import com.beust.klaxon.KlaxonJson | ||
import org.jetbrains.kotlinx.dataframe.AnyCol | ||
import org.jetbrains.kotlinx.dataframe.AnyFrame | ||
import org.jetbrains.kotlinx.dataframe.ColumnsContainer | ||
import org.jetbrains.kotlinx.dataframe.DataColumn | ||
import org.jetbrains.kotlinx.dataframe.api.indices | ||
import org.jetbrains.kotlinx.dataframe.api.isList | ||
import org.jetbrains.kotlinx.dataframe.api.name | ||
import org.jetbrains.kotlinx.dataframe.api.rows | ||
import org.jetbrains.kotlinx.dataframe.api.toDataFrame | ||
import org.jetbrains.kotlinx.dataframe.columns.ColumnGroup | ||
import org.jetbrains.kotlinx.dataframe.columns.ColumnKind | ||
import org.jetbrains.kotlinx.dataframe.columns.FrameColumn | ||
import org.jetbrains.kotlinx.dataframe.impl.io.SerializationKeys.COLUMNS | ||
import org.jetbrains.kotlinx.dataframe.impl.io.SerializationKeys.DATA | ||
import org.jetbrains.kotlinx.dataframe.impl.io.SerializationKeys.KIND | ||
import org.jetbrains.kotlinx.dataframe.impl.io.SerializationKeys.KOTLIN_DATAFRAME | ||
import org.jetbrains.kotlinx.dataframe.impl.io.SerializationKeys.METADATA | ||
import org.jetbrains.kotlinx.dataframe.impl.io.SerializationKeys.NCOL | ||
import org.jetbrains.kotlinx.dataframe.impl.io.SerializationKeys.NROW | ||
import org.jetbrains.kotlinx.dataframe.impl.io.SerializationKeys.VERSION | ||
import org.jetbrains.kotlinx.dataframe.io.arrayColumnName | ||
import org.jetbrains.kotlinx.dataframe.io.valueColumnName | ||
import org.jetbrains.kotlinx.dataframe.ncol | ||
import org.jetbrains.kotlinx.dataframe.nrow | ||
import org.jetbrains.kotlinx.dataframe.typeClass | ||
|
||
internal fun KlaxonJson.encodeRow(frame: ColumnsContainer<*>, index: Int): JsonObject? { | ||
val values = frame.columns().map { col -> | ||
when (col) { | ||
is ColumnGroup<*> -> encodeRow(col, index) | ||
is FrameColumn<*> -> encodeFrame(col[index]) | ||
else -> encodeValue(col, index) | ||
}.let { col.name to it } | ||
} | ||
if (values.isEmpty()) return null | ||
return obj(values) | ||
} | ||
|
||
internal object SerializationKeys { | ||
const val DATA = "data" | ||
const val METADATA = "metadata" | ||
const val KIND = "kind" | ||
const val NCOL = "ncol" | ||
const val NROW = "nrow" | ||
const val VERSION = "\$version" | ||
const val COLUMNS = "columns" | ||
const val KOTLIN_DATAFRAME = "kotlin_dataframe" | ||
} | ||
|
||
internal const val SERIALIZATION_VERSION = "2.0.0" | ||
|
||
internal fun KlaxonJson.encodeRowWithMetadata( | ||
frame: ColumnsContainer<*>, | ||
index: Int, | ||
rowLimit: Int? = null | ||
): JsonObject? { | ||
val values = frame.columns().map { col -> | ||
when (col) { | ||
is ColumnGroup<*> -> obj( | ||
DATA to encodeRowWithMetadata(col, index, rowLimit), | ||
METADATA to obj(KIND to ColumnKind.Group.toString()) | ||
) | ||
|
||
is FrameColumn<*> -> { | ||
val data = if (rowLimit == null) encodeFrameWithMetadata(col[index]) | ||
else encodeFrameWithMetadata(col[index].rows().take(rowLimit).toDataFrame(), rowLimit) | ||
obj( | ||
DATA to data, | ||
METADATA to obj( | ||
KIND to ColumnKind.Frame.toString(), | ||
NCOL to col[index].ncol, | ||
NROW to col[index].nrow | ||
) | ||
) | ||
} | ||
|
||
else -> encodeValue(col, index) | ||
}.let { col.name to it } | ||
} | ||
if (values.isEmpty()) return null | ||
return obj(values) | ||
} | ||
|
||
private val valueTypes = | ||
setOf(Boolean::class, Double::class, Int::class, Float::class, Long::class, Short::class, Byte::class) | ||
|
||
internal fun KlaxonJson.encodeValue(col: AnyCol, index: Int): Any? = when { | ||
col.isList() -> col[index]?.let { array(it as List<*>) } ?: array() | ||
col.typeClass in valueTypes -> { | ||
val v = col[index] | ||
if ((v is Double && v.isNaN()) || (v is Float && v.isNaN())) { | ||
v.toString() | ||
} else v | ||
} | ||
|
||
else -> col[index]?.toString() | ||
} | ||
|
||
internal fun KlaxonJson.encodeFrameWithMetadata(frame: AnyFrame, rowLimit: Int? = null): JsonArray<*> { | ||
val valueColumn = frame.extractValueColumn() | ||
val arrayColumn = frame.extractArrayColumn() | ||
|
||
val arraysAreFrames = arrayColumn?.kind() == ColumnKind.Frame | ||
|
||
val data = frame.indices().map { rowIndex -> | ||
valueColumn | ||
?.get(rowIndex) | ||
?: arrayColumn?.get(rowIndex) | ||
?.let { | ||
if (arraysAreFrames) encodeFrameWithMetadata(it as AnyFrame, rowLimit) else null | ||
} | ||
?: encodeRowWithMetadata(frame, rowIndex, rowLimit) | ||
} | ||
|
||
return array(data) | ||
} | ||
|
||
internal fun AnyFrame.extractValueColumn(): DataColumn<*>? { | ||
val allColumns = columns() | ||
|
||
return allColumns.filter { it.name.startsWith(valueColumnName) } | ||
.takeIf { isPossibleToFindUnnamedColumns } | ||
?.maxByOrNull { it.name }?.let { valueCol -> | ||
if (valueCol.kind() != org.jetbrains.kotlinx.dataframe.columns.ColumnKind.Value) { // check that value in this column is not null only when other values are null | ||
null | ||
} else { | ||
// check that value in this column is not null only when other values are null | ||
val isValidValueColumn = rows().all { row -> | ||
if (valueCol[row] != null) { | ||
allColumns.all { col -> | ||
if (col.name != valueCol.name) col[row] == null | ||
else true | ||
} | ||
} else true | ||
} | ||
if (isValidValueColumn) valueCol | ||
else null | ||
} | ||
} | ||
} | ||
|
||
// if there is only 1 column, then `isValidValueColumn` always true. | ||
// But at the same time, we shouldn't treat dataFrameOf("value")(1,2,3) like unnamed column | ||
// because it was created by user. | ||
internal val AnyFrame.isPossibleToFindUnnamedColumns: Boolean | ||
get() = columns().size != 1 | ||
|
||
internal fun AnyFrame.extractArrayColumn(): DataColumn<*>? { | ||
val allColumns = columns() | ||
|
||
return columns().filter { it.name.startsWith(arrayColumnName) } | ||
.takeIf { isPossibleToFindUnnamedColumns } | ||
?.maxByOrNull { it.name }?.let { arrayCol -> | ||
if (arrayCol.kind() == org.jetbrains.kotlinx.dataframe.columns.ColumnKind.Group) null | ||
ermolenkodev marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
else { | ||
// check that value in this column is not null only when other values are null | ||
val isValidArrayColumn = rows().all { row -> | ||
if (arrayCol[row] != null) { | ||
allColumns.all { col -> | ||
if (col.name != arrayCol.name) col[row] == null | ||
else true | ||
} | ||
} else true | ||
} | ||
if (isValidArrayColumn) arrayCol | ||
else null | ||
} | ||
} | ||
} | ||
|
||
internal fun KlaxonJson.encodeFrame(frame: AnyFrame): JsonArray<*> { | ||
val valueColumn = frame.extractValueColumn() | ||
val arrayColumn = frame.extractArrayColumn() | ||
|
||
val arraysAreFrames = arrayColumn?.kind() == ColumnKind.Frame | ||
|
||
val data = frame.indices().map { rowIndex -> | ||
valueColumn | ||
?.get(rowIndex) | ||
?: arrayColumn?.get(rowIndex) | ||
?.let { | ||
if (arraysAreFrames) encodeFrame(it as AnyFrame) else null | ||
} | ||
?: encodeRow(frame, rowIndex) | ||
} | ||
|
||
return array(data) | ||
} | ||
|
||
internal fun KlaxonJson.encodeDataFrameWithMetadata( | ||
frame: AnyFrame, | ||
rowLimit: Int, | ||
nestedRowLimit: Int? = null, | ||
): JsonObject { | ||
return obj( | ||
VERSION to SERIALIZATION_VERSION, | ||
METADATA to obj( | ||
COLUMNS to frame.columnNames(), | ||
NROW to frame.rowsCount(), | ||
NCOL to frame.columnsCount() | ||
), | ||
KOTLIN_DATAFRAME to encodeFrameWithMetadata( | ||
frame.rows().take(rowLimit).toDataFrame(), | ||
rowLimit = nestedRowLimit | ||
), | ||
) | ||
} |
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.