Skip to content

Commit 860ea39

Browse files
committed
Merge branch 'master' into statistics-fixes
2 parents c8de339 + 82729b4 commit 860ea39

File tree

24 files changed

+512
-96
lines changed

24 files changed

+512
-96
lines changed

core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/cast.kt

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,36 @@ public inline fun <reified T> AnyFrame.castTo(
4242
verify: Boolean = true,
4343
): DataFrame<T> = cast<T>(verify = verify)
4444

45+
/**
46+
* With the compiler plugin, schema marker T of DataFrame can be a local type.
47+
* You cannot refer to it directly from your code, like a type argument for cast.
48+
* The example below shows a situation where you'd need to cast DataFrame<*> to DataFrame<plugin generated local type>.
49+
* This function helps by inferring type from [schemaFrom]
50+
* ```
51+
*
52+
* // parse listOf("b:1:abc", "c:2:bca")
53+
* private fun convert(data: List<String>)/*: DataFrame<plugin generated local type>*/ = data.map { it.split(":") }.toDataFrame {
54+
* "part1" from { it[0] }
55+
* "part2" from { it[1].toInt() }
56+
* "part3" from { it[2] }
57+
* }
58+
*
59+
* fun serialize(data: List<String>, destination: File) {
60+
* convert(data).writeJson(destination)
61+
* }
62+
*
63+
* fun deserializeAndUse(file: File) {
64+
* val df = DataFrame.readJson(file).castTo(schemaFrom = ::convert)
65+
* // Possible to use properties
66+
* df.part1.print()
67+
* }
68+
* ```
69+
*/
70+
public inline fun <reified T> AnyFrame.castTo(
71+
@Suppress("UNUSED_PARAMETER") schemaFrom: Function<DataFrame<T>>,
72+
verify: Boolean = true,
73+
): DataFrame<T> = cast<T>(verify = verify)
74+
4575
public fun <T> AnyRow.cast(): DataRow<T> = this as DataRow<T>
4676

4777
public inline fun <reified T> AnyRow.cast(verify: Boolean = true): DataRow<T> = df().cast<T>(verify)[0]

core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/parse.kt

Lines changed: 18 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -20,13 +20,12 @@ import org.jetbrains.kotlinx.dataframe.api.ParserOptions
2020
import org.jetbrains.kotlinx.dataframe.api.asColumnGroup
2121
import org.jetbrains.kotlinx.dataframe.api.asDataColumn
2222
import org.jetbrains.kotlinx.dataframe.api.cast
23-
import org.jetbrains.kotlinx.dataframe.api.emptyDataFrame
24-
import org.jetbrains.kotlinx.dataframe.api.getColumnsWithPaths
23+
import org.jetbrains.kotlinx.dataframe.api.convert
2524
import org.jetbrains.kotlinx.dataframe.api.isColumnGroup
2625
import org.jetbrains.kotlinx.dataframe.api.isFrameColumn
2726
import org.jetbrains.kotlinx.dataframe.api.isSubtypeOf
28-
import org.jetbrains.kotlinx.dataframe.api.toColumn
29-
import org.jetbrains.kotlinx.dataframe.api.tryParse
27+
import org.jetbrains.kotlinx.dataframe.api.map
28+
import org.jetbrains.kotlinx.dataframe.api.to
3029
import org.jetbrains.kotlinx.dataframe.columns.TypeSuggestion
3130
import org.jetbrains.kotlinx.dataframe.columns.size
3231
import org.jetbrains.kotlinx.dataframe.exceptions.TypeConversionException
@@ -531,32 +530,34 @@ internal fun <T> DataColumn<String?>.parse(parser: StringParser<T>, options: Par
531530
)
532531
}
533532

534-
internal fun <T> DataFrame<T>.parseImpl(options: ParserOptions?, columns: ColumnsSelector<T, Any?>): DataFrame<T> {
535-
val convertedCols = getColumnsWithPaths(columns).map { col ->
533+
internal fun <T> DataFrame<T>.parseImpl(options: ParserOptions?, columns: ColumnsSelector<T, Any?>): DataFrame<T> =
534+
convert(columns).to { col ->
536535
when {
537536
// when a frame column is requested to be parsed,
538537
// parse each value/frame column at any depth inside each DataFrame in the frame column
539-
col.isFrameColumn() ->
540-
col.values.map {
538+
col.isFrameColumn() -> {
539+
col.map {
541540
it.parseImpl(options) {
542541
colsAtAnyDepth { !it.isColumnGroup() }
543542
}
544-
}.toColumn(col.name)
543+
}
544+
}
545545

546546
// when a column group is requested to be parsed,
547547
// parse each column in the group
548-
col.isColumnGroup() ->
548+
col.isColumnGroup() -> {
549549
col.parseImpl(options) { all() }
550550
.asColumnGroup(col.name())
551551
.asDataColumn()
552+
}
552553

553554
// Base case, parse the column if it's a `String?` column
554-
col.isSubtypeOf<String?>() ->
555-
col.cast<String?>().tryParse(options)
555+
col.isSubtypeOf<String?>() -> {
556+
col.cast<String?>().tryParseImpl(options)
557+
}
556558

557-
else -> col
558-
}.let { ColumnToInsert(col.path, it) }
559+
else -> {
560+
col
561+
}
562+
}
559563
}
560-
561-
return emptyDataFrame<T>().insertImpl(convertedCols)
562-
}

core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/html.kt

Lines changed: 49 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ import org.jetbrains.kotlinx.dataframe.AnyCol
55
import org.jetbrains.kotlinx.dataframe.AnyFrame
66
import org.jetbrains.kotlinx.dataframe.AnyRow
77
import org.jetbrains.kotlinx.dataframe.DataFrame
8+
import org.jetbrains.kotlinx.dataframe.api.FormattedFrame
89
import org.jetbrains.kotlinx.dataframe.api.FormattingDSL
910
import org.jetbrains.kotlinx.dataframe.api.RowColFormatter
1011
import org.jetbrains.kotlinx.dataframe.api.asColumnGroup
@@ -138,13 +139,13 @@ internal var sessionId = (Random().nextInt() % 128) shl 24
138139
internal fun nextTableId() = sessionId + (tableInSessionId++)
139140

140141
internal fun AnyFrame.toHtmlData(
141-
configuration: DisplayConfiguration = DisplayConfiguration.DEFAULT,
142+
defaultConfiguration: DisplayConfiguration = DisplayConfiguration.DEFAULT,
142143
cellRenderer: CellRenderer,
143144
): DataFrameHtmlData {
144145
val scripts = mutableListOf<String>()
145-
val queue = LinkedList<Pair<AnyFrame, Int>>()
146+
val queue = LinkedList<RenderingQueueItem>()
146147

147-
fun AnyFrame.columnToJs(col: AnyCol, rowsLimit: Int?): ColumnDataForJs {
148+
fun AnyFrame.columnToJs(col: AnyCol, rowsLimit: Int?, configuration: DisplayConfiguration): ColumnDataForJs {
148149
val values = if (rowsLimit != null) rows().take(rowsLimit) else rows()
149150
val scale = if (col.isNumber()) col.asNumbers().scale() else 1
150151
val format = if (scale > 0) {
@@ -155,13 +156,15 @@ internal fun AnyFrame.toHtmlData(
155156
val renderConfig = configuration.copy(decimalFormat = format)
156157
val contents = values.map {
157158
val value = it[col]
158-
if (value is AnyFrame) {
159-
if (value.isEmpty()) {
159+
val content = value.toDataFrameLikeOrNull()
160+
if (content != null) {
161+
val df = content.df()
162+
if (df.isEmpty()) {
160163
HtmlContent("", null)
161164
} else {
162165
val id = nextTableId()
163-
queue.add(value to id)
164-
DataFrameReference(id, value.size)
166+
queue.add(RenderingQueueItem(df, id, content.configuration(defaultConfiguration)))
167+
DataFrameReference(id, df.size)
165168
}
166169
} else {
167170
val html =
@@ -174,20 +177,25 @@ internal fun AnyFrame.toHtmlData(
174177
HtmlContent(html, style)
175178
}
176179
}
180+
val nested = if (col is ColumnGroup<*>) {
181+
col.columns().map { col.columnToJs(it, rowsLimit, configuration) }
182+
} else {
183+
emptyList()
184+
}
177185
return ColumnDataForJs(
178186
column = col,
179-
nested = if (col is ColumnGroup<*>) col.columns().map { col.columnToJs(it, rowsLimit) } else emptyList(),
187+
nested = nested,
180188
rightAlign = col.isSubtypeOf<Number?>(),
181189
values = contents,
182190
)
183191
}
184192

185193
val rootId = nextTableId()
186-
queue.add(this to rootId)
194+
queue.add(RenderingQueueItem(this, rootId, defaultConfiguration))
187195
while (!queue.isEmpty()) {
188-
val (nextDf, nextId) = queue.pop()
196+
val (nextDf, nextId, configuration) = queue.pop()
189197
val rowsLimit = if (nextId == rootId) configuration.rowsLimit else configuration.nestedRowsLimit
190-
val preparedColumns = nextDf.columns().map { nextDf.columnToJs(it, rowsLimit) }
198+
val preparedColumns = nextDf.columns().map { nextDf.columnToJs(it, rowsLimit, configuration) }
191199
val js = tableJs(preparedColumns, nextId, rootId, nextDf.nrow)
192200
scripts.add(js)
193201
}
@@ -196,6 +204,36 @@ internal fun AnyFrame.toHtmlData(
196204
return DataFrameHtmlData(style = "", body = body, script = script)
197205
}
198206

207+
private interface DataFrameLike {
208+
fun configuration(default: DisplayConfiguration): DisplayConfiguration
209+
210+
fun df(): AnyFrame
211+
}
212+
213+
private fun Any?.toDataFrameLikeOrNull(): DataFrameLike? =
214+
when (this) {
215+
is AnyFrame -> {
216+
object : DataFrameLike {
217+
override fun configuration(default: DisplayConfiguration) = default
218+
219+
override fun df(): AnyFrame = this@toDataFrameLikeOrNull
220+
}
221+
}
222+
223+
is FormattedFrame<*> -> {
224+
object : DataFrameLike {
225+
override fun configuration(default: DisplayConfiguration): DisplayConfiguration =
226+
getDisplayConfiguration(default)
227+
228+
override fun df(): AnyFrame = df
229+
}
230+
}
231+
232+
else -> null
233+
}
234+
235+
private data class RenderingQueueItem(val df: DataFrame<*>, val id: Int, val configuration: DisplayConfiguration)
236+
199237
private const val DEFAULT_HTML_IMG_SIZE = 100
200238

201239
/**

core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/json.kt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -269,6 +269,8 @@ public fun DataFrame.Companion.readJsonStr(
269269
* @param header Optional list of column names. If given, [text] will be read like an object with [header] being the keys.
270270
* @return [DataRow] from the given [text].
271271
*/
272+
@Refine
273+
@Interpretable("DataRowReadJsonStr")
272274
public fun DataRow.Companion.readJsonStr(
273275
@Language("json") text: String,
274276
header: List<String> = emptyList(),

core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/rendering/RenderingTests.kt

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,20 +2,24 @@ package org.jetbrains.kotlinx.dataframe.rendering
22

33
import io.kotest.matchers.shouldBe
44
import io.kotest.matchers.string.shouldContain
5+
import io.kotest.matchers.string.shouldInclude
56
import io.kotest.matchers.string.shouldNotContain
67
import org.jetbrains.kotlinx.dataframe.DataColumn
8+
import org.jetbrains.kotlinx.dataframe.api.CellAttributes
79
import org.jetbrains.kotlinx.dataframe.api.add
810
import org.jetbrains.kotlinx.dataframe.api.asColumnGroup
911
import org.jetbrains.kotlinx.dataframe.api.columnOf
1012
import org.jetbrains.kotlinx.dataframe.api.dataFrameOf
1113
import org.jetbrains.kotlinx.dataframe.api.emptyDataFrame
14+
import org.jetbrains.kotlinx.dataframe.api.format
1215
import org.jetbrains.kotlinx.dataframe.api.group
1316
import org.jetbrains.kotlinx.dataframe.api.into
1417
import org.jetbrains.kotlinx.dataframe.api.move
1518
import org.jetbrains.kotlinx.dataframe.api.named
1619
import org.jetbrains.kotlinx.dataframe.api.parse
1720
import org.jetbrains.kotlinx.dataframe.api.schema
1821
import org.jetbrains.kotlinx.dataframe.api.toDataFrame
22+
import org.jetbrains.kotlinx.dataframe.api.with
1923
import org.jetbrains.kotlinx.dataframe.io.DisplayConfiguration
2024
import org.jetbrains.kotlinx.dataframe.io.escapeHTML
2125
import org.jetbrains.kotlinx.dataframe.io.formatter
@@ -24,7 +28,9 @@ import org.jetbrains.kotlinx.dataframe.io.maxWidth
2428
import org.jetbrains.kotlinx.dataframe.io.print
2529
import org.jetbrains.kotlinx.dataframe.io.renderToString
2630
import org.jetbrains.kotlinx.dataframe.io.renderToStringTable
31+
import org.jetbrains.kotlinx.dataframe.io.tableInSessionId
2732
import org.jetbrains.kotlinx.dataframe.io.toHTML
33+
import org.jetbrains.kotlinx.dataframe.io.toStandaloneHTML
2834
import org.jetbrains.kotlinx.dataframe.jupyter.DefaultCellRenderer
2935
import org.jetbrains.kotlinx.dataframe.jupyter.RenderedContent
3036
import org.jetbrains.kotlinx.dataframe.samples.api.TestBase
@@ -196,4 +202,29 @@ class RenderingTests : TestBase() {
196202
val rendered = schema.toString()
197203
rendered shouldBe "a: Int?\nb: IntArray\nc: Array<Int>\nd: Array<Int?>"
198204
}
205+
206+
@Test
207+
fun `render nested FormattedFrame as DataFrame`() {
208+
val empty = object : CellAttributes {
209+
override fun attributes(): List<Pair<String, String>> = emptyList()
210+
}
211+
val df = dataFrameOf("b")(1)
212+
213+
val formatted = dataFrameOf("a")(df.format { all() }.with { empty })
214+
val nestedFrame = dataFrameOf("a")(df)
215+
val configuration = DisplayConfiguration(enableFallbackStaticTables = false)
216+
tableInSessionId = 0
217+
val formattedHtml = formatted.toStandaloneHTML(configuration).toString()
218+
tableInSessionId = 0
219+
val regularHtml = nestedFrame.toStandaloneHTML(configuration).toString()
220+
221+
formattedHtml.replace("api.FormattedFrame", "DataFrame") shouldBe regularHtml
222+
}
223+
224+
@Test
225+
fun `render cell attributes for nested FormattedFrame`() {
226+
val df = dataFrameOf("a")(dataFrameOf("b")(1).format { all() }.with { background(green) })
227+
val html = df.toStandaloneHTML()
228+
html.toString() shouldInclude "style: \"background-color"
229+
}
199230
}

core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/cast.kt

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,36 @@ public inline fun <reified T> AnyFrame.castTo(
4242
verify: Boolean = true,
4343
): DataFrame<T> = cast<T>(verify = verify)
4444

45+
/**
46+
* With the compiler plugin, schema marker T of DataFrame can be a local type.
47+
* You cannot refer to it directly from your code, like a type argument for cast.
48+
* The example below shows a situation where you'd need to cast DataFrame<*> to DataFrame<plugin generated local type>.
49+
* This function helps by inferring type from [schemaFrom]
50+
* ```
51+
*
52+
* // parse listOf("b:1:abc", "c:2:bca")
53+
* private fun convert(data: List<String>)/*: DataFrame<plugin generated local type>*/ = data.map { it.split(":") }.toDataFrame {
54+
* "part1" from { it[0] }
55+
* "part2" from { it[1].toInt() }
56+
* "part3" from { it[2] }
57+
* }
58+
*
59+
* fun serialize(data: List<String>, destination: File) {
60+
* convert(data).writeJson(destination)
61+
* }
62+
*
63+
* fun deserializeAndUse(file: File) {
64+
* val df = DataFrame.readJson(file).castTo(schemaFrom = ::convert)
65+
* // Possible to use properties
66+
* df.part1.print()
67+
* }
68+
* ```
69+
*/
70+
public inline fun <reified T> AnyFrame.castTo(
71+
@Suppress("UNUSED_PARAMETER") schemaFrom: Function<DataFrame<T>>,
72+
verify: Boolean = true,
73+
): DataFrame<T> = cast<T>(verify = verify)
74+
4575
public fun <T> AnyRow.cast(): DataRow<T> = this as DataRow<T>
4676

4777
public inline fun <reified T> AnyRow.cast(verify: Boolean = true): DataRow<T> = df().cast<T>(verify)[0]

core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/parse.kt

Lines changed: 12 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,6 @@ import org.jetbrains.kotlinx.dataframe.api.isColumnGroup
2525
import org.jetbrains.kotlinx.dataframe.api.isFrameColumn
2626
import org.jetbrains.kotlinx.dataframe.api.isSubtypeOf
2727
import org.jetbrains.kotlinx.dataframe.api.map
28-
import org.jetbrains.kotlinx.dataframe.api.parse
2928
import org.jetbrains.kotlinx.dataframe.api.to
3029
import org.jetbrains.kotlinx.dataframe.columns.TypeSuggestion
3130
import org.jetbrains.kotlinx.dataframe.columns.size
@@ -536,23 +535,29 @@ internal fun <T> DataFrame<T>.parseImpl(options: ParserOptions?, columns: Column
536535
when {
537536
// when a frame column is requested to be parsed,
538537
// parse each value/frame column at any depth inside each DataFrame in the frame column
539-
col.isFrameColumn() -> col.map {
540-
it.parseImpl(options) {
541-
colsAtAnyDepth { !it.isColumnGroup() }
538+
col.isFrameColumn() -> {
539+
col.map {
540+
it.parseImpl(options) {
541+
colsAtAnyDepth { !it.isColumnGroup() }
542+
}
542543
}
543544
}
544545

545546
// when a column group is requested to be parsed,
546547
// parse each column in the group
547-
col.isColumnGroup() ->
548+
col.isColumnGroup() -> {
548549
col.parseImpl(options) { all() }
549550
.asColumnGroup(col.name())
550551
.asDataColumn()
552+
}
551553

552554
// Base case, parse the column if it's a `String?` column
553-
col.isSubtypeOf<String?>() ->
555+
col.isSubtypeOf<String?>() -> {
554556
col.cast<String?>().tryParseImpl(options)
557+
}
555558

556-
else -> col
559+
else -> {
560+
col
561+
}
557562
}
558563
}

0 commit comments

Comments
 (0)