Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 23 additions & 0 deletions core/api/core.api
Original file line number Diff line number Diff line change
Expand Up @@ -227,6 +227,11 @@ public abstract interface annotation class org/jetbrains/kotlinx/dataframe/annot
public abstract fun isOpen ()Z
}

public abstract interface annotation class org/jetbrains/kotlinx/dataframe/annotations/DataSchemaSource : java/lang/annotation/Annotation {
public abstract fun qualifier ()Ljava/lang/String;
public abstract fun source ()Ljava/lang/String;
}

public final class org/jetbrains/kotlinx/dataframe/annotations/DataSchemaVisibility : java/lang/Enum {
public static final field EXPLICIT_PUBLIC Lorg/jetbrains/kotlinx/dataframe/annotations/DataSchemaVisibility;
public static final field IMPLICIT_PUBLIC Lorg/jetbrains/kotlinx/dataframe/annotations/DataSchemaVisibility;
Expand Down Expand Up @@ -6189,6 +6194,12 @@ public final class org/jetbrains/kotlinx/dataframe/io/DataFrameHtmlData$Companio
public static synthetic fun tableDefinitions$default (Lorg/jetbrains/kotlinx/dataframe/io/DataFrameHtmlData$Companion;ZZILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/io/DataFrameHtmlData;
}

public abstract interface class org/jetbrains/kotlinx/dataframe/io/DataFrameProvider {
public abstract fun default ()Lorg/jetbrains/kotlinx/dataframe/DataFrame;
public abstract fun getSchemaKType ()Lkotlin/reflect/KType;
public abstract fun read (Ljava/lang/String;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
}

public final class org/jetbrains/kotlinx/dataframe/io/DisplayConfiguration {
public static final field Companion Lorg/jetbrains/kotlinx/dataframe/io/DisplayConfiguration$Companion;
public synthetic fun <init> (Ljava/lang/Integer;Ljava/lang/Integer;ILkotlin/jvm/functions/Function3;Ljava/lang/String;ZZZZZILkotlin/jvm/internal/DefaultConstructorMarker;)V
Expand Down Expand Up @@ -6305,6 +6316,18 @@ public final class org/jetbrains/kotlinx/dataframe/io/RendererDecimalFormat$Comp
public final fun of-VVLz-gw (Ljava/lang/String;)Ljava/lang/String;
}

public abstract interface class org/jetbrains/kotlinx/dataframe/io/SchemaReader {
public static final field Companion Lorg/jetbrains/kotlinx/dataframe/io/SchemaReader$Companion;
public static final field DEFAULT_QUALIFIER Ljava/lang/String;
public fun accepts (Ljava/lang/String;Ljava/lang/String;)Z
public fun default (Ljava/lang/String;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
public abstract fun read (Ljava/lang/String;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
}

public final class org/jetbrains/kotlinx/dataframe/io/SchemaReader$Companion {
public static final field DEFAULT_QUALIFIER Ljava/lang/String;
}

public final class org/jetbrains/kotlinx/dataframe/io/StringKt {
public static final fun renderToString (Lorg/jetbrains/kotlinx/dataframe/DataFrame;IIZZZZZ)Ljava/lang/String;
public static synthetic fun renderToString$default (Lorg/jetbrains/kotlinx/dataframe/DataFrame;IIZZZZZILjava/lang/Object;)Ljava/lang/String;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import org.jetbrains.kotlinx.dataframe.api.KeyValueProperty
import org.jetbrains.kotlinx.dataframe.columns.ColumnGroup
import org.jetbrains.kotlinx.dataframe.columns.FrameColumn
import org.jetbrains.kotlinx.dataframe.documentation.UnifyingNumbers
import org.jetbrains.kotlinx.dataframe.io.SchemaReader

/**
* Annotation preprocessing will generate a DataSchema interface from the data at `path`.
Expand Down Expand Up @@ -43,6 +44,9 @@ public annotation class ImportDataSchema(
val enableExperimentalOpenApi: Boolean = false,
)

@Target(AnnotationTarget.CLASS)
public annotation class DataSchemaSource(val source: String, val qualifier: String = SchemaReader.DEFAULT_QUALIFIER)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I can guess what source does, but qualifier is unclear for me. Some comments would be nice, even though it's just a proof-of-concept

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think I remember vaguely from your demo that this allowed to make distinctions of some kind. But I don't remember exactly without a small example

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

May be some KDocs here as well?


public enum class DataSchemaVisibility {
INTERNAL,
IMPLICIT_PUBLIC,
Expand Down
30 changes: 30 additions & 0 deletions core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/guess.kt
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,36 @@ public interface SupportedDataFrameFormat : SupportedFormat {
public fun readDataFrame(file: File, header: List<String> = emptyList()): DataFrame<*>
}

/**
* User-facing API implemented by a companion object of an imported schema [org.jetbrains.kotlinx.dataframe.annotations.DataSchemaSource]
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

*the companion object

* Intended use:
* 1. Interact with this API to read dataframe of a desired type
* 2. API is used to implement "generic dataframe reader" based on [schemaKType] and other available methods
*/
public interface DataFrameProvider<T> {
public val schemaKType: KType

public fun default(): DataFrame<T>

public fun read(path: String): DataFrame<T>
}

/**
* Handler of classes annotated with [org.jetbrains.kotlinx.dataframe.annotations.DataSchemaSource].
* Implementations must have a single zero-argument constructor
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

they could also be object singletons maybe, since they have no state

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good idea! It first needs to be adjusted in compiler plugin

*/
public interface SchemaReader {
public companion object {
public const val DEFAULT_QUALIFIER: String = "default"
}

public fun accepts(path: String, qualifier: String): Boolean = qualifier == DEFAULT_QUALIFIER

public fun read(path: String): DataFrame<*>
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we do need a way to pass extra arguments in the future. There are many ways to do this but we can figure that out later :)


public fun default(path: String): DataFrame<*> = read(path)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'd still rename this to readDefault or readSource, something more imperative.

}

/**
* Implement this interface to provide additional [DataSchema] interface generation formats for DataFrames (such as OpenAPI).
* Note, this doesn't add functionality to [DataFrame.Companion.read], just [ImportDataSchema] and Gradle plugin.
Expand Down
7 changes: 7 additions & 0 deletions dataframe-arrow/api/dataframe-arrow.api
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,13 @@ public final class org/jetbrains/kotlinx/dataframe/io/ArrowReadingKt {
public static synthetic fun toDataFrame$default (Lorg/apache/arrow/vector/ipc/ArrowReader;Lorg/jetbrains/kotlinx/dataframe/api/NullabilityOptions;ILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
}

public final class org/jetbrains/kotlinx/dataframe/io/ArrowSchemaReader : org/jetbrains/kotlinx/dataframe/io/SchemaReader {
public fun <init> ()V
public fun accepts (Ljava/lang/String;Ljava/lang/String;)Z
public fun default (Ljava/lang/String;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
public fun read (Ljava/lang/String;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
}

public final class org/jetbrains/kotlinx/dataframe/io/ArrowTypesMatchingKt {
public static final fun toArrowField (Lorg/jetbrains/kotlinx/dataframe/DataColumn;Lkotlin/jvm/functions/Function1;)Lorg/apache/arrow/vector/types/pojo/Field;
public static synthetic fun toArrowField$default (Lorg/jetbrains/kotlinx/dataframe/DataColumn;Lkotlin/jvm/functions/Function1;ILjava/lang/Object;)Lorg/apache/arrow/vector/types/pojo/Field;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,13 @@ public class ArrowFeather : SupportedDataFrameFormat {
DefaultReadArrowMethod(pathRepresentation)
}

public class ArrowSchemaReader : SchemaReader {
override fun accepts(path: String, qualifier: String): Boolean =
super.accepts(path, qualifier) && path.endsWith(".feather")

override fun read(path: String): DataFrame<*> = DataFrame.readArrowFeather(path)
}

private const val READ_ARROW_FEATHER = "readArrowFeather"

internal const val ARROW_PARQUET_DEFAULT_BATCH_SIZE = 32768L
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
org.jetbrains.kotlinx.dataframe.io.ArrowSchemaReader
14 changes: 14 additions & 0 deletions dataframe-csv/api/dataframe-csv.api
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,13 @@ public final class org/jetbrains/kotlinx/dataframe/io/CsvDeephaven : org/jetbrai
public fun readDataFrame (Ljava/io/InputStream;Ljava/util/List;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
}

public final class org/jetbrains/kotlinx/dataframe/io/CsvSchemaReader : org/jetbrains/kotlinx/dataframe/io/SchemaReader {
public fun <init> ()V
public fun accepts (Ljava/lang/String;Ljava/lang/String;)Z
public fun default (Ljava/lang/String;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
public fun read (Ljava/lang/String;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
}

public final class org/jetbrains/kotlinx/dataframe/io/QuoteMode : java/lang/Enum {
public static final field ALL Lorg/jetbrains/kotlinx/dataframe/io/QuoteMode;
public static final field ALL_NON_NULL Lorg/jetbrains/kotlinx/dataframe/io/QuoteMode;
Expand Down Expand Up @@ -102,6 +109,13 @@ public final class org/jetbrains/kotlinx/dataframe/io/TsvDeephaven : org/jetbrai
public fun readDataFrame (Ljava/io/InputStream;Ljava/util/List;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
}

public final class org/jetbrains/kotlinx/dataframe/io/TsvSchemaReader : org/jetbrains/kotlinx/dataframe/io/SchemaReader {
public fun <init> ()V
public fun accepts (Ljava/lang/String;Ljava/lang/String;)Z
public fun default (Ljava/lang/String;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
public fun read (Ljava/lang/String;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
}

public final class org/jetbrains/kotlinx/dataframe/io/UtilKt {
public static final fun getDEFAULT_DELIM_NULL_STRINGS ()Ljava/util/Set;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,20 @@ public class CsvDeephaven(private val delimiter: Char = DelimParams.CSV_DELIMITE
}
}

public class CsvSchemaReader : SchemaReader {
override fun accepts(path: String, qualifier: String): Boolean =
super.accepts(path, qualifier) && path.endsWith(".csv")

override fun read(path: String): DataFrame<*> = DataFrame.readCsv(path)
}

public class TsvSchemaReader : SchemaReader {
override fun accepts(path: String, qualifier: String): Boolean =
super.accepts(path, qualifier) && path.endsWith(".tsv")

override fun read(path: String): DataFrame<*> = DataFrame.readTsv(path)
}

private const val READ_CSV = "readCsv"

internal class DefaultReadCsvMethod(path: String?, arguments: MethodArguments) :
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
org.jetbrains.kotlinx.dataframe.io.CsvSchemaReader
org.jetbrains.kotlinx.dataframe.io.TsvSchemaReader
7 changes: 7 additions & 0 deletions dataframe-excel/api/dataframe-excel.api
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,13 @@ public final class org/jetbrains/kotlinx/dataframe/io/Excel : org/jetbrains/kotl
public fun readDataFrame (Ljava/io/InputStream;Ljava/util/List;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
}

public final class org/jetbrains/kotlinx/dataframe/io/ExcelSchemaReader : org/jetbrains/kotlinx/dataframe/io/SchemaReader {
public fun <init> ()V
public fun accepts (Ljava/lang/String;Ljava/lang/String;)Z
public fun default (Ljava/lang/String;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
public fun read (Ljava/lang/String;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
}

public final class org/jetbrains/kotlinx/dataframe/io/FormattingOptions {
public fun <init> (Ljava/lang/String;Lorg/apache/poi/ss/usermodel/DataFormatter;)V
public synthetic fun <init> (Ljava/lang/String;Lorg/apache/poi/ss/usermodel/DataFormatter;ILkotlin/jvm/internal/DefaultConstructorMarker;)V
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,13 @@ public class Excel : SupportedDataFrameFormat {
DefaultReadExcelMethod(pathRepresentation)
}

public class ExcelSchemaReader : SchemaReader {
override fun accepts(path: String, qualifier: String): Boolean =
super.accepts(path, qualifier) && path.endsWith(".xlsx") || path.endsWith(".xls")

override fun read(path: String): DataFrame<*> = DataFrame.readExcel(path)
}

private const val MESSAGE_REMOVE_1_1 = "Will be removed in 1.1."
internal const val READ_EXCEL_OLD = "This function is only here for binary compatibility. $MESSAGE_REMOVE_1_1"

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
org.jetbrains.kotlinx.dataframe.io.ExcelSchemaReader
8 changes: 8 additions & 0 deletions dataframe-jdbc/api/dataframe-jdbc.api
Original file line number Diff line number Diff line change
@@ -1,3 +1,11 @@
public abstract class org/jetbrains/kotlinx/dataframe/io/AbstractJdbcSchemaReader : org/jetbrains/kotlinx/dataframe/io/SchemaReader {
public fun <init> ()V
public fun accepts (Ljava/lang/String;Ljava/lang/String;)Z
public abstract fun config ()Lorg/jetbrains/kotlinx/dataframe/io/DbConnectionConfig;
public fun default (Ljava/lang/String;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
public fun read (Ljava/lang/String;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
}

public final class org/jetbrains/kotlinx/dataframe/io/DbConnectionConfig {
public fun <init> (Ljava/lang/String;Ljava/lang/String;Ljava/lang/String;Z)V
public synthetic fun <init> (Ljava/lang/String;Ljava/lang/String;Ljava/lang/String;ZILkotlin/jvm/internal/DefaultConstructorMarker;)V
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,14 @@ public class Jdbc :
DefaultReadJdbcMethod(pathRepresentation)
}

public abstract class AbstractJdbcSchemaReader : SchemaReader {
override fun accepts(path: String, qualifier: String): Boolean = qualifier == "jdbc"

public abstract fun config(): DbConnectionConfig

override fun read(path: String): DataFrame<*> = config().readDataFrame(path)
}

private fun DataFrame.Companion.readJDBC(stream: File): DataFrame<*> {
TODO("Not yet implemented")
}
Expand Down
7 changes: 7 additions & 0 deletions dataframe-json/api/dataframe-json.api
Original file line number Diff line number Diff line change
Expand Up @@ -80,3 +80,10 @@ public final class org/jetbrains/kotlinx/dataframe/io/JsonKt {
public static synthetic fun writeJson$default (Lorg/jetbrains/kotlinx/dataframe/DataRow;Ljava/lang/String;ZILjava/lang/Object;)V
}

public final class org/jetbrains/kotlinx/dataframe/io/JsonSchemaReader : org/jetbrains/kotlinx/dataframe/io/SchemaReader {
public fun <init> ()V
public fun accepts (Ljava/lang/String;Ljava/lang/String;)Z
public fun default (Ljava/lang/String;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
public fun read (Ljava/lang/String;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
}

Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,13 @@ public class JSON(
}
}

public class JsonSchemaReader : SchemaReader {
override fun accepts(path: String, qualifier: String): Boolean =
qualifier == SchemaReader.DEFAULT_QUALIFIER && path.endsWith(".json")

override fun read(path: String): DataFrame<*> = DataFrame.readJson(path)
}

internal const val ARRAY_COLUMN_NAME: String = "array"
internal const val VALUE_COLUMN_NAME: String = "value"

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
org.jetbrains.kotlinx.dataframe.io.JsonSchemaReader
1 change: 1 addition & 0 deletions plugins/symbol-processor/build.gradle.kts
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ dependencies {
implementation(projects.dataframe)
// experimental
implementation(projects.dataframeOpenapiGenerator)
implementation(libs.serialization.json)

implementation(libs.ksp.api)
implementation(libs.kotlin.reflect)
Expand Down
Original file line number Diff line number Diff line change
@@ -1,32 +1,103 @@
package org.jetbrains.dataframe.ksp

import com.google.devtools.ksp.KspExperimental
import com.google.devtools.ksp.getAnnotationsByType
import com.google.devtools.ksp.processing.CodeGenerator
import com.google.devtools.ksp.processing.KSPLogger
import com.google.devtools.ksp.processing.Resolver
import com.google.devtools.ksp.processing.SymbolProcessor
import com.google.devtools.ksp.symbol.KSAnnotated
import com.google.devtools.ksp.symbol.KSClassDeclaration
import com.google.devtools.ksp.validate
import kotlinx.serialization.json.JsonPrimitive
import org.jetbrains.kotlinx.dataframe.annotations.DataSchemaSource
import org.jetbrains.kotlinx.dataframe.api.schema
import org.jetbrains.kotlinx.dataframe.io.SchemaReader
import java.io.File
import java.util.ServiceLoader

class DataFrameSymbolProcessor(
private val codeGenerator: com.google.devtools.ksp.processing.CodeGenerator,
private val codeGenerator: CodeGenerator,
private val logger: KSPLogger,
private val resolutionDir: String?,
private val configuration: DataFrameConfiguration,
) : SymbolProcessor {

@OptIn(KspExperimental::class)
override fun process(resolver: Resolver): List<KSAnnotated> {
val extensionsGenerator = ExtensionsGenerator(resolver, codeGenerator, logger)
val (validDataSchemas, invalidDataSchemas) = extensionsGenerator.resolveDataSchemaDeclarations()
validDataSchemas.forEach {
val file = it.origin.containingFile ?: return@forEach
extensionsGenerator.generateExtensions(file, it.origin, it.properties)
if (!configuration.experimentalImportSchema) {
val extensionsGenerator = ExtensionsGenerator(resolver, codeGenerator, logger)
val (validDataSchemas, invalidDataSchemas) = extensionsGenerator.resolveDataSchemaDeclarations()
validDataSchemas.forEach {
val file = it.origin.containingFile ?: return@forEach
extensionsGenerator.generateExtensions(file, it.origin, it.properties)
}

val dataSchemaGenerator = DataSchemaGenerator(resolver, configuration.resolutionDir, logger, codeGenerator)
val importStatements = dataSchemaGenerator.resolveImportStatements()
importStatements.forEach { importStatement ->
dataSchemaGenerator.generateDataSchema(importStatement)
}
// by returning invalidDataSchemas we defer the processing of incomplete DataSchema declarations
// for example when DataSchema declaration references another one generated by @file:ImportDataSchema
return invalidDataSchemas
}

val serviceLoader = ServiceLoader.load(SchemaReader::class.java, SchemaReader::class.java.classLoader)
val providers = serviceLoader.toList()

if (configuration.debug) {
logger.warn("Service path: " + System.getProperty("java.class.path"))
logger.warn("Found providers: " + providers.joinToString())
logger.warn(
"Service URLs: " +
SchemaReader::class.java.classLoader?.getResources(
"META-INF/services/${SchemaReader::class.java.name}",
)
?.toList()?.joinToString(),
)
}

val dataSchemaGenerator = DataSchemaGenerator(resolver, resolutionDir, logger, codeGenerator)
val importStatements = dataSchemaGenerator.resolveImportStatements()
importStatements.forEach { importStatement ->
dataSchemaGenerator.generateDataSchema(importStatement)
if (configuration.importedSchemasOutput == null) {
logger.warn(
"""
Provide KSP argument:
ksp {
arg("$DATAFRAME_IMPORTED_SCHEMAS_OUTPUT", layout.projectDirectory.dir("src/schemas"))
}
""".trimIndent(),
)
return emptyList()
}

// by returning invalidDataSchemas we defer the processing of incomplete DataSchema declarations
// for example when DataSchema declaration references another one generated by @file:ImportDataSchema
return invalidDataSchemas
val (validDeclarations, invalidDeclarations) = resolver
.getSymbolsWithAnnotation(DataSchemaSource::class.qualifiedName!!)
.filterIsInstance<KSClassDeclaration>()
.flatMap { classDeclaration ->
classDeclaration.getAnnotationsByType(DataSchemaSource::class).map { classDeclaration to it }
}
.partition { it.first.validate() }

validDeclarations
.forEach { (classDeclaration, annotation) ->
val reader = providers.firstOrNull { it.accepts(annotation.source, annotation.qualifier) }
if (reader != null) {
val metadata = mapOf(
"format" to JsonPrimitive(reader::class.qualifiedName!!),
"data" to JsonPrimitive(annotation.source),
)
val df = reader.default(annotation.source)
File(
File(configuration.importedSchemasOutput),
"${classDeclaration.simpleName.asString()}.json",
).writeText(df.schema().toJsonString(metadata = metadata))
} else {
val availableReaders = providers.joinToString { it::class.qualifiedName!! }
val message =
"No reader found for ${classDeclaration.simpleName.asString()}. Available readers: $availableReaders"
logger.warn(message)
}
}

return invalidDeclarations.map { it.first }
}
}
Loading