From 4ae02d9cadb051841fc7df4c6382e0b6159d8db9 Mon Sep 17 00:00:00 2001 From: Nikita Klimenko Date: Mon, 4 Aug 2025 19:18:14 +0300 Subject: [PATCH] Prototype new iteration of ImportDataSchema annotation --- core/api/core.api | 23 +++++ .../dataframe/annotations/ImportDataSchema.kt | 4 + .../jetbrains/kotlinx/dataframe/io/guess.kt | 30 ++++++ dataframe-arrow/api/dataframe-arrow.api | 7 ++ .../kotlinx/dataframe/io/arrowReading.kt | 7 ++ ...etbrains.kotlinx.dataframe.io.SchemaReader | 1 + dataframe-csv/api/dataframe-csv.api | 14 +++ .../org/jetbrains/kotlinx/dataframe/io/csv.kt | 14 +++ ...etbrains.kotlinx.dataframe.io.SchemaReader | 2 + dataframe-excel/api/dataframe-excel.api | 7 ++ .../jetbrains/kotlinx/dataframe/io/xlsx.kt | 7 ++ ...etbrains.kotlinx.dataframe.io.SchemaReader | 1 + dataframe-jdbc/api/dataframe-jdbc.api | 8 ++ .../jetbrains/kotlinx/dataframe/io/Jdbc.kt | 8 ++ dataframe-json/api/dataframe-json.api | 7 ++ .../jetbrains/kotlinx/dataframe/io/json.kt | 7 ++ ...etbrains.kotlinx.dataframe.io.SchemaReader | 1 + plugins/symbol-processor/build.gradle.kts | 1 + .../dataframe/ksp/DataFrameSymbolProcessor.kt | 99 ++++++++++++++++--- .../ksp/DataFrameSymbolProcessorProvider.kt | 25 ++++- .../jetbrains/dataframe/ksp/toJsonElement.kt | 53 ++++++++++ .../ksp/ImportedSchemaSerializationTests.kt | 55 +++++++++++ 22 files changed, 365 insertions(+), 16 deletions(-) create mode 100644 dataframe-arrow/src/main/resources/META-INF/services/org.jetbrains.kotlinx.dataframe.io.SchemaReader create mode 100644 dataframe-csv/src/main/resources/META-INF/services/org.jetbrains.kotlinx.dataframe.io.SchemaReader create mode 100644 dataframe-excel/src/main/resources/META-INF/services/org.jetbrains.kotlinx.dataframe.io.SchemaReader create mode 100644 dataframe-json/src/main/resources/META-INF/services/org.jetbrains.kotlinx.dataframe.io.SchemaReader create mode 100644 plugins/symbol-processor/src/main/kotlin/org/jetbrains/dataframe/ksp/toJsonElement.kt create mode 100644 plugins/symbol-processor/src/test/kotlin/org/jetbrains/dataframe/ksp/ImportedSchemaSerializationTests.kt diff --git a/core/api/core.api b/core/api/core.api index afb2d7b7ef..880358c38d 100644 --- a/core/api/core.api +++ b/core/api/core.api @@ -227,6 +227,11 @@ public abstract interface annotation class org/jetbrains/kotlinx/dataframe/annot public abstract fun isOpen ()Z } +public abstract interface annotation class org/jetbrains/kotlinx/dataframe/annotations/DataSchemaSource : java/lang/annotation/Annotation { + public abstract fun qualifier ()Ljava/lang/String; + public abstract fun source ()Ljava/lang/String; +} + public final class org/jetbrains/kotlinx/dataframe/annotations/DataSchemaVisibility : java/lang/Enum { public static final field EXPLICIT_PUBLIC Lorg/jetbrains/kotlinx/dataframe/annotations/DataSchemaVisibility; public static final field IMPLICIT_PUBLIC Lorg/jetbrains/kotlinx/dataframe/annotations/DataSchemaVisibility; @@ -6190,6 +6195,12 @@ public final class org/jetbrains/kotlinx/dataframe/io/DataFrameHtmlData$Companio public static synthetic fun tableDefinitions$default (Lorg/jetbrains/kotlinx/dataframe/io/DataFrameHtmlData$Companion;ZZILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/io/DataFrameHtmlData; } +public abstract interface class org/jetbrains/kotlinx/dataframe/io/DataFrameProvider { + public abstract fun default ()Lorg/jetbrains/kotlinx/dataframe/DataFrame; + public abstract fun getSchemaKType ()Lkotlin/reflect/KType; + public abstract fun read (Ljava/lang/String;)Lorg/jetbrains/kotlinx/dataframe/DataFrame; +} + public final class org/jetbrains/kotlinx/dataframe/io/DisplayConfiguration { public static final field Companion Lorg/jetbrains/kotlinx/dataframe/io/DisplayConfiguration$Companion; public synthetic fun (Ljava/lang/Integer;Ljava/lang/Integer;ILkotlin/jvm/functions/Function3;Ljava/lang/String;ZZZZZILkotlin/jvm/internal/DefaultConstructorMarker;)V @@ -6306,6 +6317,18 @@ public final class org/jetbrains/kotlinx/dataframe/io/RendererDecimalFormat$Comp public final fun of-VVLz-gw (Ljava/lang/String;)Ljava/lang/String; } +public abstract interface class org/jetbrains/kotlinx/dataframe/io/SchemaReader { + public static final field Companion Lorg/jetbrains/kotlinx/dataframe/io/SchemaReader$Companion; + public static final field DEFAULT_QUALIFIER Ljava/lang/String; + public fun accepts (Ljava/lang/String;Ljava/lang/String;)Z + public fun default (Ljava/lang/String;)Lorg/jetbrains/kotlinx/dataframe/DataFrame; + public abstract fun read (Ljava/lang/String;)Lorg/jetbrains/kotlinx/dataframe/DataFrame; +} + +public final class org/jetbrains/kotlinx/dataframe/io/SchemaReader$Companion { + public static final field DEFAULT_QUALIFIER Ljava/lang/String; +} + public final class org/jetbrains/kotlinx/dataframe/io/StringKt { public static final fun renderToString (Lorg/jetbrains/kotlinx/dataframe/DataFrame;IIZZZZZ)Ljava/lang/String; public static synthetic fun renderToString$default (Lorg/jetbrains/kotlinx/dataframe/DataFrame;IIZZZZZILjava/lang/Object;)Ljava/lang/String; diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/annotations/ImportDataSchema.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/annotations/ImportDataSchema.kt index 3f7e0d576c..b8382ee5ed 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/annotations/ImportDataSchema.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/annotations/ImportDataSchema.kt @@ -5,6 +5,7 @@ import org.jetbrains.kotlinx.dataframe.api.KeyValueProperty import org.jetbrains.kotlinx.dataframe.columns.ColumnGroup import org.jetbrains.kotlinx.dataframe.columns.FrameColumn import org.jetbrains.kotlinx.dataframe.documentation.UnifyingNumbers +import org.jetbrains.kotlinx.dataframe.io.SchemaReader /** * Annotation preprocessing will generate a DataSchema interface from the data at `path`. @@ -43,6 +44,9 @@ public annotation class ImportDataSchema( val enableExperimentalOpenApi: Boolean = false, ) +@Target(AnnotationTarget.CLASS) +public annotation class DataSchemaSource(val source: String, val qualifier: String = SchemaReader.DEFAULT_QUALIFIER) + public enum class DataSchemaVisibility { INTERNAL, IMPLICIT_PUBLIC, diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/guess.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/guess.kt index 527b0a639b..13e9cc0f64 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/guess.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/guess.kt @@ -59,6 +59,36 @@ public interface SupportedDataFrameFormat : SupportedFormat { public fun readDataFrame(file: File, header: List = emptyList()): DataFrame<*> } +/** + * User-facing API implemented by a companion object of an imported schema [org.jetbrains.kotlinx.dataframe.annotations.DataSchemaSource] + * Intended use: + * 1. Interact with this API to read dataframe of a desired type + * 2. API is used to implement "generic dataframe reader" based on [schemaKType] and other available methods + */ +public interface DataFrameProvider { + public val schemaKType: KType + + public fun default(): DataFrame + + public fun read(path: String): DataFrame +} + +/** + * Handler of classes annotated with [org.jetbrains.kotlinx.dataframe.annotations.DataSchemaSource]. + * Implementations must have a single zero-argument constructor + */ +public interface SchemaReader { + public companion object { + public const val DEFAULT_QUALIFIER: String = "default" + } + + public fun accepts(path: String, qualifier: String): Boolean = qualifier == DEFAULT_QUALIFIER + + public fun read(path: String): DataFrame<*> + + public fun default(path: String): DataFrame<*> = read(path) +} + /** * Implement this interface to provide additional [DataSchema] interface generation formats for DataFrames (such as OpenAPI). * Note, this doesn't add functionality to [DataFrame.Companion.read], just [ImportDataSchema] and Gradle plugin. diff --git a/dataframe-arrow/api/dataframe-arrow.api b/dataframe-arrow/api/dataframe-arrow.api index e0bf301c31..1ca2b256fd 100644 --- a/dataframe-arrow/api/dataframe-arrow.api +++ b/dataframe-arrow/api/dataframe-arrow.api @@ -47,6 +47,13 @@ public final class org/jetbrains/kotlinx/dataframe/io/ArrowReadingKt { public static synthetic fun toDataFrame$default (Lorg/apache/arrow/vector/ipc/ArrowReader;Lorg/jetbrains/kotlinx/dataframe/api/NullabilityOptions;ILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataFrame; } +public final class org/jetbrains/kotlinx/dataframe/io/ArrowSchemaReader : org/jetbrains/kotlinx/dataframe/io/SchemaReader { + public fun ()V + public fun accepts (Ljava/lang/String;Ljava/lang/String;)Z + public fun default (Ljava/lang/String;)Lorg/jetbrains/kotlinx/dataframe/DataFrame; + public fun read (Ljava/lang/String;)Lorg/jetbrains/kotlinx/dataframe/DataFrame; +} + public final class org/jetbrains/kotlinx/dataframe/io/ArrowTypesMatchingKt { public static final fun toArrowField (Lorg/jetbrains/kotlinx/dataframe/DataColumn;Lkotlin/jvm/functions/Function1;)Lorg/apache/arrow/vector/types/pojo/Field; public static synthetic fun toArrowField$default (Lorg/jetbrains/kotlinx/dataframe/DataColumn;Lkotlin/jvm/functions/Function1;ILjava/lang/Object;)Lorg/apache/arrow/vector/types/pojo/Field; diff --git a/dataframe-arrow/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/arrowReading.kt b/dataframe-arrow/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/arrowReading.kt index 68b4914714..708d01f877 100644 --- a/dataframe-arrow/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/arrowReading.kt +++ b/dataframe-arrow/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/arrowReading.kt @@ -36,6 +36,13 @@ public class ArrowFeather : SupportedDataFrameFormat { DefaultReadArrowMethod(pathRepresentation) } +public class ArrowSchemaReader : SchemaReader { + override fun accepts(path: String, qualifier: String): Boolean = + super.accepts(path, qualifier) && path.endsWith(".feather") + + override fun read(path: String): DataFrame<*> = DataFrame.readArrowFeather(path) +} + private const val READ_ARROW_FEATHER = "readArrowFeather" internal const val ARROW_PARQUET_DEFAULT_BATCH_SIZE = 32768L diff --git a/dataframe-arrow/src/main/resources/META-INF/services/org.jetbrains.kotlinx.dataframe.io.SchemaReader b/dataframe-arrow/src/main/resources/META-INF/services/org.jetbrains.kotlinx.dataframe.io.SchemaReader new file mode 100644 index 0000000000..5a80af969a --- /dev/null +++ b/dataframe-arrow/src/main/resources/META-INF/services/org.jetbrains.kotlinx.dataframe.io.SchemaReader @@ -0,0 +1 @@ +org.jetbrains.kotlinx.dataframe.io.ArrowSchemaReader diff --git a/dataframe-csv/api/dataframe-csv.api b/dataframe-csv/api/dataframe-csv.api index 1ead1f667b..eabee0517d 100644 --- a/dataframe-csv/api/dataframe-csv.api +++ b/dataframe-csv/api/dataframe-csv.api @@ -10,6 +10,13 @@ public final class org/jetbrains/kotlinx/dataframe/io/CsvDeephaven : org/jetbrai public fun readDataFrame (Ljava/io/InputStream;Ljava/util/List;)Lorg/jetbrains/kotlinx/dataframe/DataFrame; } +public final class org/jetbrains/kotlinx/dataframe/io/CsvSchemaReader : org/jetbrains/kotlinx/dataframe/io/SchemaReader { + public fun ()V + public fun accepts (Ljava/lang/String;Ljava/lang/String;)Z + public fun default (Ljava/lang/String;)Lorg/jetbrains/kotlinx/dataframe/DataFrame; + public fun read (Ljava/lang/String;)Lorg/jetbrains/kotlinx/dataframe/DataFrame; +} + public final class org/jetbrains/kotlinx/dataframe/io/QuoteMode : java/lang/Enum { public static final field ALL Lorg/jetbrains/kotlinx/dataframe/io/QuoteMode; public static final field ALL_NON_NULL Lorg/jetbrains/kotlinx/dataframe/io/QuoteMode; @@ -102,6 +109,13 @@ public final class org/jetbrains/kotlinx/dataframe/io/TsvDeephaven : org/jetbrai public fun readDataFrame (Ljava/io/InputStream;Ljava/util/List;)Lorg/jetbrains/kotlinx/dataframe/DataFrame; } +public final class org/jetbrains/kotlinx/dataframe/io/TsvSchemaReader : org/jetbrains/kotlinx/dataframe/io/SchemaReader { + public fun ()V + public fun accepts (Ljava/lang/String;Ljava/lang/String;)Z + public fun default (Ljava/lang/String;)Lorg/jetbrains/kotlinx/dataframe/DataFrame; + public fun read (Ljava/lang/String;)Lorg/jetbrains/kotlinx/dataframe/DataFrame; +} + public final class org/jetbrains/kotlinx/dataframe/io/UtilKt { public static final fun getDEFAULT_DELIM_NULL_STRINGS ()Ljava/util/Set; } diff --git a/dataframe-csv/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/csv.kt b/dataframe-csv/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/csv.kt index 3b4d408145..01e4395bd0 100644 --- a/dataframe-csv/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/csv.kt +++ b/dataframe-csv/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/csv.kt @@ -29,6 +29,20 @@ public class CsvDeephaven(private val delimiter: Char = DelimParams.CSV_DELIMITE } } +public class CsvSchemaReader : SchemaReader { + override fun accepts(path: String, qualifier: String): Boolean = + super.accepts(path, qualifier) && path.endsWith(".csv") + + override fun read(path: String): DataFrame<*> = DataFrame.readCsv(path) +} + +public class TsvSchemaReader : SchemaReader { + override fun accepts(path: String, qualifier: String): Boolean = + super.accepts(path, qualifier) && path.endsWith(".tsv") + + override fun read(path: String): DataFrame<*> = DataFrame.readTsv(path) +} + private const val READ_CSV = "readCsv" internal class DefaultReadCsvMethod(path: String?, arguments: MethodArguments) : diff --git a/dataframe-csv/src/main/resources/META-INF/services/org.jetbrains.kotlinx.dataframe.io.SchemaReader b/dataframe-csv/src/main/resources/META-INF/services/org.jetbrains.kotlinx.dataframe.io.SchemaReader new file mode 100644 index 0000000000..14bd25f023 --- /dev/null +++ b/dataframe-csv/src/main/resources/META-INF/services/org.jetbrains.kotlinx.dataframe.io.SchemaReader @@ -0,0 +1,2 @@ +org.jetbrains.kotlinx.dataframe.io.CsvSchemaReader +org.jetbrains.kotlinx.dataframe.io.TsvSchemaReader diff --git a/dataframe-excel/api/dataframe-excel.api b/dataframe-excel/api/dataframe-excel.api index 3dfa2b766d..0b7cc30b07 100644 --- a/dataframe-excel/api/dataframe-excel.api +++ b/dataframe-excel/api/dataframe-excel.api @@ -8,6 +8,13 @@ public final class org/jetbrains/kotlinx/dataframe/io/Excel : org/jetbrains/kotl public fun readDataFrame (Ljava/io/InputStream;Ljava/util/List;)Lorg/jetbrains/kotlinx/dataframe/DataFrame; } +public final class org/jetbrains/kotlinx/dataframe/io/ExcelSchemaReader : org/jetbrains/kotlinx/dataframe/io/SchemaReader { + public fun ()V + public fun accepts (Ljava/lang/String;Ljava/lang/String;)Z + public fun default (Ljava/lang/String;)Lorg/jetbrains/kotlinx/dataframe/DataFrame; + public fun read (Ljava/lang/String;)Lorg/jetbrains/kotlinx/dataframe/DataFrame; +} + public final class org/jetbrains/kotlinx/dataframe/io/FormattingOptions { public fun (Ljava/lang/String;Lorg/apache/poi/ss/usermodel/DataFormatter;)V public synthetic fun (Ljava/lang/String;Lorg/apache/poi/ss/usermodel/DataFormatter;ILkotlin/jvm/internal/DefaultConstructorMarker;)V diff --git a/dataframe-excel/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/xlsx.kt b/dataframe-excel/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/xlsx.kt index 719acb7f2d..cf47c82bd1 100644 --- a/dataframe-excel/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/xlsx.kt +++ b/dataframe-excel/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/xlsx.kt @@ -59,6 +59,13 @@ public class Excel : SupportedDataFrameFormat { DefaultReadExcelMethod(pathRepresentation) } +public class ExcelSchemaReader : SchemaReader { + override fun accepts(path: String, qualifier: String): Boolean = + super.accepts(path, qualifier) && path.endsWith(".xlsx") || path.endsWith(".xls") + + override fun read(path: String): DataFrame<*> = DataFrame.readExcel(path) +} + private const val MESSAGE_REMOVE_1_1 = "Will be removed in 1.1." internal const val READ_EXCEL_OLD = "This function is only here for binary compatibility. $MESSAGE_REMOVE_1_1" diff --git a/dataframe-excel/src/main/resources/META-INF/services/org.jetbrains.kotlinx.dataframe.io.SchemaReader b/dataframe-excel/src/main/resources/META-INF/services/org.jetbrains.kotlinx.dataframe.io.SchemaReader new file mode 100644 index 0000000000..22cccb30e0 --- /dev/null +++ b/dataframe-excel/src/main/resources/META-INF/services/org.jetbrains.kotlinx.dataframe.io.SchemaReader @@ -0,0 +1 @@ +org.jetbrains.kotlinx.dataframe.io.ExcelSchemaReader diff --git a/dataframe-jdbc/api/dataframe-jdbc.api b/dataframe-jdbc/api/dataframe-jdbc.api index 0a0fbf50f5..1c6609799e 100644 --- a/dataframe-jdbc/api/dataframe-jdbc.api +++ b/dataframe-jdbc/api/dataframe-jdbc.api @@ -1,3 +1,11 @@ +public abstract class org/jetbrains/kotlinx/dataframe/io/AbstractJdbcSchemaReader : org/jetbrains/kotlinx/dataframe/io/SchemaReader { + public fun ()V + public fun accepts (Ljava/lang/String;Ljava/lang/String;)Z + public abstract fun config ()Lorg/jetbrains/kotlinx/dataframe/io/DbConnectionConfig; + public fun default (Ljava/lang/String;)Lorg/jetbrains/kotlinx/dataframe/DataFrame; + public fun read (Ljava/lang/String;)Lorg/jetbrains/kotlinx/dataframe/DataFrame; +} + public final class org/jetbrains/kotlinx/dataframe/io/DbConnectionConfig { public fun (Ljava/lang/String;Ljava/lang/String;Ljava/lang/String;Z)V public synthetic fun (Ljava/lang/String;Ljava/lang/String;Ljava/lang/String;ZILkotlin/jvm/internal/DefaultConstructorMarker;)V diff --git a/dataframe-jdbc/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/Jdbc.kt b/dataframe-jdbc/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/Jdbc.kt index 386500e005..22e9edfdb7 100644 --- a/dataframe-jdbc/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/Jdbc.kt +++ b/dataframe-jdbc/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/Jdbc.kt @@ -38,6 +38,14 @@ public class Jdbc : DefaultReadJdbcMethod(pathRepresentation) } +public abstract class AbstractJdbcSchemaReader : SchemaReader { + override fun accepts(path: String, qualifier: String): Boolean = qualifier == "jdbc" + + public abstract fun config(): DbConnectionConfig + + override fun read(path: String): DataFrame<*> = config().readDataFrame(path) +} + private fun DataFrame.Companion.readJDBC(stream: File): DataFrame<*> { TODO("Not yet implemented") } diff --git a/dataframe-json/api/dataframe-json.api b/dataframe-json/api/dataframe-json.api index 0c70a1c831..d2221262e8 100644 --- a/dataframe-json/api/dataframe-json.api +++ b/dataframe-json/api/dataframe-json.api @@ -80,3 +80,10 @@ public final class org/jetbrains/kotlinx/dataframe/io/JsonKt { public static synthetic fun writeJson$default (Lorg/jetbrains/kotlinx/dataframe/DataRow;Ljava/lang/String;ZILjava/lang/Object;)V } +public final class org/jetbrains/kotlinx/dataframe/io/JsonSchemaReader : org/jetbrains/kotlinx/dataframe/io/SchemaReader { + public fun ()V + public fun accepts (Ljava/lang/String;Ljava/lang/String;)Z + public fun default (Ljava/lang/String;)Lorg/jetbrains/kotlinx/dataframe/DataFrame; + public fun read (Ljava/lang/String;)Lorg/jetbrains/kotlinx/dataframe/DataFrame; +} + diff --git a/dataframe-json/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/json.kt b/dataframe-json/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/json.kt index a6acaeab3a..8a39c856c8 100644 --- a/dataframe-json/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/json.kt +++ b/dataframe-json/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/json.kt @@ -127,6 +127,13 @@ public class JSON( } } +public class JsonSchemaReader : SchemaReader { + override fun accepts(path: String, qualifier: String): Boolean = + qualifier == SchemaReader.DEFAULT_QUALIFIER && path.endsWith(".json") + + override fun read(path: String): DataFrame<*> = DataFrame.readJson(path) +} + internal const val ARRAY_COLUMN_NAME: String = "array" internal const val VALUE_COLUMN_NAME: String = "value" diff --git a/dataframe-json/src/main/resources/META-INF/services/org.jetbrains.kotlinx.dataframe.io.SchemaReader b/dataframe-json/src/main/resources/META-INF/services/org.jetbrains.kotlinx.dataframe.io.SchemaReader new file mode 100644 index 0000000000..d0b7612885 --- /dev/null +++ b/dataframe-json/src/main/resources/META-INF/services/org.jetbrains.kotlinx.dataframe.io.SchemaReader @@ -0,0 +1 @@ +org.jetbrains.kotlinx.dataframe.io.JsonSchemaReader diff --git a/plugins/symbol-processor/build.gradle.kts b/plugins/symbol-processor/build.gradle.kts index 11bb05cb4d..6355761590 100644 --- a/plugins/symbol-processor/build.gradle.kts +++ b/plugins/symbol-processor/build.gradle.kts @@ -22,6 +22,7 @@ dependencies { implementation(projects.dataframe) // experimental implementation(projects.dataframeOpenapiGenerator) + implementation(libs.serialization.json) implementation(libs.ksp.api) implementation(libs.kotlin.reflect) diff --git a/plugins/symbol-processor/src/main/kotlin/org/jetbrains/dataframe/ksp/DataFrameSymbolProcessor.kt b/plugins/symbol-processor/src/main/kotlin/org/jetbrains/dataframe/ksp/DataFrameSymbolProcessor.kt index 942f2ae697..729b9e4ba6 100644 --- a/plugins/symbol-processor/src/main/kotlin/org/jetbrains/dataframe/ksp/DataFrameSymbolProcessor.kt +++ b/plugins/symbol-processor/src/main/kotlin/org/jetbrains/dataframe/ksp/DataFrameSymbolProcessor.kt @@ -1,32 +1,103 @@ package org.jetbrains.dataframe.ksp +import com.google.devtools.ksp.KspExperimental +import com.google.devtools.ksp.getAnnotationsByType +import com.google.devtools.ksp.processing.CodeGenerator import com.google.devtools.ksp.processing.KSPLogger import com.google.devtools.ksp.processing.Resolver import com.google.devtools.ksp.processing.SymbolProcessor import com.google.devtools.ksp.symbol.KSAnnotated +import com.google.devtools.ksp.symbol.KSClassDeclaration +import com.google.devtools.ksp.validate +import kotlinx.serialization.json.JsonPrimitive +import org.jetbrains.kotlinx.dataframe.annotations.DataSchemaSource +import org.jetbrains.kotlinx.dataframe.api.schema +import org.jetbrains.kotlinx.dataframe.io.SchemaReader +import java.io.File +import java.util.ServiceLoader class DataFrameSymbolProcessor( - private val codeGenerator: com.google.devtools.ksp.processing.CodeGenerator, + private val codeGenerator: CodeGenerator, private val logger: KSPLogger, - private val resolutionDir: String?, + private val configuration: DataFrameConfiguration, ) : SymbolProcessor { + @OptIn(KspExperimental::class) override fun process(resolver: Resolver): List { - val extensionsGenerator = ExtensionsGenerator(resolver, codeGenerator, logger) - val (validDataSchemas, invalidDataSchemas) = extensionsGenerator.resolveDataSchemaDeclarations() - validDataSchemas.forEach { - val file = it.origin.containingFile ?: return@forEach - extensionsGenerator.generateExtensions(file, it.origin, it.properties) + if (!configuration.experimentalImportSchema) { + val extensionsGenerator = ExtensionsGenerator(resolver, codeGenerator, logger) + val (validDataSchemas, invalidDataSchemas) = extensionsGenerator.resolveDataSchemaDeclarations() + validDataSchemas.forEach { + val file = it.origin.containingFile ?: return@forEach + extensionsGenerator.generateExtensions(file, it.origin, it.properties) + } + + val dataSchemaGenerator = DataSchemaGenerator(resolver, configuration.resolutionDir, logger, codeGenerator) + val importStatements = dataSchemaGenerator.resolveImportStatements() + importStatements.forEach { importStatement -> + dataSchemaGenerator.generateDataSchema(importStatement) + } + // by returning invalidDataSchemas we defer the processing of incomplete DataSchema declarations + // for example when DataSchema declaration references another one generated by @file:ImportDataSchema + return invalidDataSchemas + } + + val serviceLoader = ServiceLoader.load(SchemaReader::class.java, SchemaReader::class.java.classLoader) + val providers = serviceLoader.toList() + + if (configuration.debug) { + logger.warn("Service path: " + System.getProperty("java.class.path")) + logger.warn("Found providers: " + providers.joinToString()) + logger.warn( + "Service URLs: " + + SchemaReader::class.java.classLoader?.getResources( + "META-INF/services/${SchemaReader::class.java.name}", + ) + ?.toList()?.joinToString(), + ) } - val dataSchemaGenerator = DataSchemaGenerator(resolver, resolutionDir, logger, codeGenerator) - val importStatements = dataSchemaGenerator.resolveImportStatements() - importStatements.forEach { importStatement -> - dataSchemaGenerator.generateDataSchema(importStatement) + if (configuration.importedSchemasOutput == null) { + logger.warn( + """ + Provide KSP argument: + ksp { + arg("$DATAFRAME_IMPORTED_SCHEMAS_OUTPUT", layout.projectDirectory.dir("src/schemas")) + } + """.trimIndent(), + ) + return emptyList() } - // by returning invalidDataSchemas we defer the processing of incomplete DataSchema declarations - // for example when DataSchema declaration references another one generated by @file:ImportDataSchema - return invalidDataSchemas + val (validDeclarations, invalidDeclarations) = resolver + .getSymbolsWithAnnotation(DataSchemaSource::class.qualifiedName!!) + .filterIsInstance() + .flatMap { classDeclaration -> + classDeclaration.getAnnotationsByType(DataSchemaSource::class).map { classDeclaration to it } + } + .partition { it.first.validate() } + + validDeclarations + .forEach { (classDeclaration, annotation) -> + val reader = providers.firstOrNull { it.accepts(annotation.source, annotation.qualifier) } + if (reader != null) { + val metadata = mapOf( + "format" to JsonPrimitive(reader::class.qualifiedName!!), + "data" to JsonPrimitive(annotation.source), + ) + val df = reader.default(annotation.source) + File( + File(configuration.importedSchemasOutput), + "${classDeclaration.simpleName.asString()}.json", + ).writeText(df.schema().toJsonString(metadata = metadata)) + } else { + val availableReaders = providers.joinToString { it::class.qualifiedName!! } + val message = + "No reader found for ${classDeclaration.simpleName.asString()}. Available readers: $availableReaders" + logger.warn(message) + } + } + + return invalidDeclarations.map { it.first } } } diff --git a/plugins/symbol-processor/src/main/kotlin/org/jetbrains/dataframe/ksp/DataFrameSymbolProcessorProvider.kt b/plugins/symbol-processor/src/main/kotlin/org/jetbrains/dataframe/ksp/DataFrameSymbolProcessorProvider.kt index 616294e441..1cdf968551 100644 --- a/plugins/symbol-processor/src/main/kotlin/org/jetbrains/dataframe/ksp/DataFrameSymbolProcessorProvider.kt +++ b/plugins/symbol-processor/src/main/kotlin/org/jetbrains/dataframe/ksp/DataFrameSymbolProcessorProvider.kt @@ -4,9 +4,30 @@ import com.google.devtools.ksp.processing.SymbolProcessor import com.google.devtools.ksp.processing.SymbolProcessorEnvironment import com.google.devtools.ksp.processing.SymbolProcessorProvider +internal const val DATAFRAME_IMPORTED_SCHEMAS_OUTPUT = "dataframe.importedSchemasOutput" + class DataFrameSymbolProcessorProvider : SymbolProcessorProvider { override fun create(environment: SymbolProcessorEnvironment): SymbolProcessor { - val resolutionDir = environment.options["dataframe.resolutionDir"] - return DataFrameSymbolProcessor(environment.codeGenerator, environment.logger, resolutionDir) + val configuration = DataFrameConfiguration( + resolutionDir = environment.options["dataframe.resolutionDir"], + importedSchemasOutput = environment.options[DATAFRAME_IMPORTED_SCHEMAS_OUTPUT], + experimentalImportSchema = environment.options["dataframe.experimentalImportSchema"].equals( + "true", + ignoreCase = true, + ), + debug = environment.options["dataframe.debug"].equals("true", ignoreCase = true), + ) + return DataFrameSymbolProcessor( + environment.codeGenerator, + environment.logger, + configuration, + ) } } + +data class DataFrameConfiguration( + val resolutionDir: String?, + val importedSchemasOutput: String?, + val experimentalImportSchema: Boolean, + val debug: Boolean, +) diff --git a/plugins/symbol-processor/src/main/kotlin/org/jetbrains/dataframe/ksp/toJsonElement.kt b/plugins/symbol-processor/src/main/kotlin/org/jetbrains/dataframe/ksp/toJsonElement.kt new file mode 100644 index 0000000000..736fba2e97 --- /dev/null +++ b/plugins/symbol-processor/src/main/kotlin/org/jetbrains/dataframe/ksp/toJsonElement.kt @@ -0,0 +1,53 @@ +package org.jetbrains.dataframe.ksp + +import kotlinx.serialization.json.Json +import kotlinx.serialization.json.JsonElement +import kotlinx.serialization.json.JsonObject +import kotlinx.serialization.json.JsonPrimitive +import kotlinx.serialization.json.buildJsonObject +import org.jetbrains.kotlinx.dataframe.schema.ColumnSchema +import org.jetbrains.kotlinx.dataframe.schema.DataFrameSchema + +/** + * Serializes data schema into a human-readable JSON format. + * Input of compiler plugin for "imported data schema" feature + */ +fun DataFrameSchema.toJsonString( + json: Json = Json { prettyPrint = true }, + metadata: Map = emptyMap(), +): String = json.encodeToString(JsonElement.serializer(), toJsonElement(metadata)) + +fun DataFrameSchema.toJsonElement(metadata: Map = emptyMap()): JsonElement = + buildJsonObject { + metadata.forEach { (key, value) -> + put(key, value) + } + + put("schema", serializeSchema()) + } + +fun DataFrameSchema.serializeSchema(): JsonElement = columns.serializeColumns() + +private fun Map.serializeColumns(): JsonObject = + buildJsonObject { + forEach { (columnName, columnSchema) -> + val (element, suffix) = columnSchema.toJsonElement() + val finalColumnName = suffix?.let { "$columnName$it" } ?: columnName + put(finalColumnName, element) + } + } + +fun ColumnSchema.toJsonElement(): Pair = + when (this) { + is ColumnSchema.Frame -> { + schema.columns.serializeColumns() to ": FrameColumn" + } + + is ColumnSchema.Group -> { + schema.columns.serializeColumns() to ": ColumnGroup" + } + + is ColumnSchema.Value -> { + JsonPrimitive(type.toString()) to null + } + } diff --git a/plugins/symbol-processor/src/test/kotlin/org/jetbrains/dataframe/ksp/ImportedSchemaSerializationTests.kt b/plugins/symbol-processor/src/test/kotlin/org/jetbrains/dataframe/ksp/ImportedSchemaSerializationTests.kt new file mode 100644 index 0000000000..ad338b6451 --- /dev/null +++ b/plugins/symbol-processor/src/test/kotlin/org/jetbrains/dataframe/ksp/ImportedSchemaSerializationTests.kt @@ -0,0 +1,55 @@ +package org.jetbrains.dataframe.ksp + +import io.kotest.matchers.shouldBe +import org.jetbrains.kotlinx.dataframe.DataFrame +import org.jetbrains.kotlinx.dataframe.api.columnOf +import org.jetbrains.kotlinx.dataframe.api.dataFrameOf +import org.jetbrains.kotlinx.dataframe.api.schema +import org.junit.Test + +class ImportedSchemaSerializationTests { + @Test + fun test() { + val df = dataFrameOf( + "a" to columnOf(""), + "group" to columnOf( + "b" to columnOf(1), + "c" to columnOf(3.0), + ), + "frame" to columnOf( + dataFrameOf( + "abc" to columnOf(111), + ), + ), + ) + + val res = df.schema().toJsonString() + res shouldBe + """ + { + "schema": { + "a": "kotlin.String", + "group: ColumnGroup": { + "b": "kotlin.Int", + "c": "kotlin.Double" + }, + "frame: FrameColumn": { + "abc": "kotlin.Int" + } + } + } + """.trimIndent() + } + + @Test + fun testEmpty() { + val df = DataFrame.empty() + + df.schema().toJsonString() shouldBe + """ + { + "schema": {} + } + """.trimIndent() + } +}