-
Notifications
You must be signed in to change notification settings - Fork 75
(internal MVP) Prototype new iteration of ImportDataSchema annotation #1416
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -59,6 +59,36 @@ public interface SupportedDataFrameFormat : SupportedFormat { | |
public fun readDataFrame(file: File, header: List<String> = emptyList()): DataFrame<*> | ||
} | ||
|
||
/** | ||
* User-facing API implemented by a companion object of an imported schema [org.jetbrains.kotlinx.dataframe.annotations.DataSchemaSource] | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. *the companion object |
||
* Intended use: | ||
* 1. Interact with this API to read dataframe of a desired type | ||
* 2. API is used to implement "generic dataframe reader" based on [schemaKType] and other available methods | ||
*/ | ||
public interface DataFrameProvider<T> { | ||
public val schemaKType: KType | ||
|
||
public fun default(): DataFrame<T> | ||
|
||
public fun read(path: String): DataFrame<T> | ||
} | ||
|
||
/** | ||
* Handler of classes annotated with [org.jetbrains.kotlinx.dataframe.annotations.DataSchemaSource]. | ||
* Implementations must have a single zero-argument constructor | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. they could also be There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Good idea! It first needs to be adjusted in compiler plugin |
||
*/ | ||
public interface SchemaReader { | ||
public companion object { | ||
public const val DEFAULT_QUALIFIER: String = "default" | ||
} | ||
|
||
public fun accepts(path: String, qualifier: String): Boolean = qualifier == DEFAULT_QUALIFIER | ||
|
||
public fun read(path: String): DataFrame<*> | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. we do need a way to pass extra arguments in the future. There are many ways to do this but we can figure that out later :) |
||
|
||
public fun default(path: String): DataFrame<*> = read(path) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'd still rename this to |
||
} | ||
|
||
/** | ||
* Implement this interface to provide additional [DataSchema] interface generation formats for DataFrames (such as OpenAPI). | ||
* Note, this doesn't add functionality to [DataFrame.Companion.read], just [ImportDataSchema] and Gradle plugin. | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
org.jetbrains.kotlinx.dataframe.io.ArrowSchemaReader |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
org.jetbrains.kotlinx.dataframe.io.CsvSchemaReader | ||
org.jetbrains.kotlinx.dataframe.io.TsvSchemaReader |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
org.jetbrains.kotlinx.dataframe.io.ExcelSchemaReader |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
org.jetbrains.kotlinx.dataframe.io.JsonSchemaReader |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,32 +1,103 @@ | ||
package org.jetbrains.dataframe.ksp | ||
|
||
import com.google.devtools.ksp.KspExperimental | ||
import com.google.devtools.ksp.getAnnotationsByType | ||
import com.google.devtools.ksp.processing.CodeGenerator | ||
import com.google.devtools.ksp.processing.KSPLogger | ||
import com.google.devtools.ksp.processing.Resolver | ||
import com.google.devtools.ksp.processing.SymbolProcessor | ||
import com.google.devtools.ksp.symbol.KSAnnotated | ||
import com.google.devtools.ksp.symbol.KSClassDeclaration | ||
import com.google.devtools.ksp.validate | ||
import kotlinx.serialization.json.JsonPrimitive | ||
import org.jetbrains.kotlinx.dataframe.annotations.DataSchemaSource | ||
import org.jetbrains.kotlinx.dataframe.api.schema | ||
import org.jetbrains.kotlinx.dataframe.io.SchemaReader | ||
import java.io.File | ||
import java.util.ServiceLoader | ||
|
||
class DataFrameSymbolProcessor( | ||
private val codeGenerator: com.google.devtools.ksp.processing.CodeGenerator, | ||
private val codeGenerator: CodeGenerator, | ||
private val logger: KSPLogger, | ||
private val resolutionDir: String?, | ||
private val configuration: DataFrameConfiguration, | ||
) : SymbolProcessor { | ||
|
||
@OptIn(KspExperimental::class) | ||
override fun process(resolver: Resolver): List<KSAnnotated> { | ||
val extensionsGenerator = ExtensionsGenerator(resolver, codeGenerator, logger) | ||
val (validDataSchemas, invalidDataSchemas) = extensionsGenerator.resolveDataSchemaDeclarations() | ||
validDataSchemas.forEach { | ||
val file = it.origin.containingFile ?: return@forEach | ||
extensionsGenerator.generateExtensions(file, it.origin, it.properties) | ||
if (!configuration.experimentalImportSchema) { | ||
val extensionsGenerator = ExtensionsGenerator(resolver, codeGenerator, logger) | ||
val (validDataSchemas, invalidDataSchemas) = extensionsGenerator.resolveDataSchemaDeclarations() | ||
validDataSchemas.forEach { | ||
val file = it.origin.containingFile ?: return@forEach | ||
extensionsGenerator.generateExtensions(file, it.origin, it.properties) | ||
} | ||
|
||
val dataSchemaGenerator = DataSchemaGenerator(resolver, configuration.resolutionDir, logger, codeGenerator) | ||
val importStatements = dataSchemaGenerator.resolveImportStatements() | ||
importStatements.forEach { importStatement -> | ||
dataSchemaGenerator.generateDataSchema(importStatement) | ||
} | ||
// by returning invalidDataSchemas we defer the processing of incomplete DataSchema declarations | ||
// for example when DataSchema declaration references another one generated by @file:ImportDataSchema | ||
return invalidDataSchemas | ||
} | ||
|
||
val serviceLoader = ServiceLoader.load(SchemaReader::class.java, SchemaReader::class.java.classLoader) | ||
val providers = serviceLoader.toList() | ||
|
||
if (configuration.debug) { | ||
logger.warn("Service path: " + System.getProperty("java.class.path")) | ||
logger.warn("Found providers: " + providers.joinToString()) | ||
logger.warn( | ||
"Service URLs: " + | ||
SchemaReader::class.java.classLoader?.getResources( | ||
"META-INF/services/${SchemaReader::class.java.name}", | ||
) | ||
?.toList()?.joinToString(), | ||
) | ||
} | ||
|
||
val dataSchemaGenerator = DataSchemaGenerator(resolver, resolutionDir, logger, codeGenerator) | ||
val importStatements = dataSchemaGenerator.resolveImportStatements() | ||
importStatements.forEach { importStatement -> | ||
dataSchemaGenerator.generateDataSchema(importStatement) | ||
if (configuration.importedSchemasOutput == null) { | ||
logger.warn( | ||
""" | ||
Provide KSP argument: | ||
ksp { | ||
arg("$DATAFRAME_IMPORTED_SCHEMAS_OUTPUT", layout.projectDirectory.dir("src/schemas")) | ||
} | ||
""".trimIndent(), | ||
) | ||
return emptyList() | ||
} | ||
|
||
// by returning invalidDataSchemas we defer the processing of incomplete DataSchema declarations | ||
// for example when DataSchema declaration references another one generated by @file:ImportDataSchema | ||
return invalidDataSchemas | ||
val (validDeclarations, invalidDeclarations) = resolver | ||
.getSymbolsWithAnnotation(DataSchemaSource::class.qualifiedName!!) | ||
.filterIsInstance<KSClassDeclaration>() | ||
.flatMap { classDeclaration -> | ||
classDeclaration.getAnnotationsByType(DataSchemaSource::class).map { classDeclaration to it } | ||
} | ||
.partition { it.first.validate() } | ||
|
||
validDeclarations | ||
.forEach { (classDeclaration, annotation) -> | ||
val reader = providers.firstOrNull { it.accepts(annotation.source, annotation.qualifier) } | ||
if (reader != null) { | ||
val metadata = mapOf( | ||
"format" to JsonPrimitive(reader::class.qualifiedName!!), | ||
"data" to JsonPrimitive(annotation.source), | ||
) | ||
val df = reader.default(annotation.source) | ||
File( | ||
File(configuration.importedSchemasOutput), | ||
"${classDeclaration.simpleName.asString()}.json", | ||
).writeText(df.schema().toJsonString(metadata = metadata)) | ||
} else { | ||
val availableReaders = providers.joinToString { it::class.qualifiedName!! } | ||
val message = | ||
"No reader found for ${classDeclaration.simpleName.asString()}. Available readers: $availableReaders" | ||
logger.warn(message) | ||
} | ||
} | ||
|
||
return invalidDeclarations.map { it.first } | ||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I can guess what
source
does, butqualifier
is unclear for me. Some comments would be nice, even though it's just a proof-of-conceptThere was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think I remember vaguely from your demo that this allowed to make distinctions of some kind. But I don't remember exactly without a small example
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
May be some KDocs here as well?