Skip to content

Commit 2bcccac

Browse files
Merge branch 'master' into convert_kdocs
# Conflicts: # core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/parse.kt
2 parents 099c8e2 + 00b712b commit 2bcccac

File tree

91 files changed

+1609
-3452
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

91 files changed

+1609
-3452
lines changed

build.gradle.kts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,7 @@ dependencies {
5454
api(projects.dataframeExcel)
5555
api(projects.dataframeJdbc)
5656
api(projects.dataframeCsv)
57+
api(projects.dataframeJson)
5758

5859
// experimental, so not included by default:
5960
// api(projects.dataframeOpenapi)
@@ -64,6 +65,7 @@ dependencies {
6465
kover(projects.dataframeOpenapi)
6566
kover(projects.dataframeJdbc)
6667
kover(projects.dataframeCsv)
68+
kover(projects.dataframeJson)
6769
kover(projects.plugins.kotlinDataframe)
6870
kover(projects.dataframeJupyter)
6971
}

core/api/core.api

Lines changed: 33 additions & 106 deletions
Large diffs are not rendered by default.

core/build.gradle.kts

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -66,8 +66,6 @@ dependencies {
6666
api(libs.commonsCsv)
6767

6868
implementation(libs.commonsIo)
69-
implementation(libs.serialization.core)
70-
implementation(libs.serialization.json)
7169
implementation(libs.fastDoubleParser)
7270

7371
api(libs.kotlin.datetimeJvm)
@@ -82,6 +80,9 @@ dependencies {
8280
testImplementation(libs.kotlin.scriptingJvm)
8381
testImplementation(libs.jsoup)
8482
testImplementation(libs.sl4jsimple)
83+
testImplementation(projects.dataframeJson)
84+
testImplementation(libs.serialization.core)
85+
testImplementation(libs.serialization.json)
8586

8687
// for checking results
8788
testImplementation(libs.commonsStatisticsDescriptive)

core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/annotations/ImportDataSchema.kt

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@ import org.jetbrains.kotlinx.dataframe.api.KeyValueProperty
55
import org.jetbrains.kotlinx.dataframe.columns.ColumnGroup
66
import org.jetbrains.kotlinx.dataframe.columns.FrameColumn
77
import org.jetbrains.kotlinx.dataframe.documentation.UnifyingNumbers
8-
import org.jetbrains.kotlinx.dataframe.io.JSON
98

109
/**
1110
* Annotation preprocessing will generate a DataSchema interface from the data at `path`.
@@ -73,8 +72,11 @@ public annotation class JdbcOptions(
7372
)
7473

7574
public annotation class JsonOptions(
76-
/** Allows the choice of how to handle type clashes when reading a JSON file. */
77-
public val typeClashTactic: JSON.TypeClashTactic = JSON.TypeClashTactic.ARRAY_AND_VALUE_COLUMNS,
75+
/**
76+
* Allows the choice of how to handle type clashes when reading a JSON file.
77+
* Must be either [JsonOptions.TypeClashTactics.ARRAY_AND_VALUE_COLUMNS] or [JsonOptions.TypeClashTactics.ANY_COLUMNS]
78+
* */
79+
public val typeClashTactic: String = TypeClashTactics.ARRAY_AND_VALUE_COLUMNS,
7880
/**
7981
* List of [JsonPath]s where instead of a [ColumnGroup], a [FrameColumn]<[KeyValueProperty]>
8082
* will be created.
@@ -85,4 +87,9 @@ public annotation class JsonOptions(
8587
public val keyValuePaths: Array<String> = [],
8688
/** Whether to [unify the numbers that are read][UnifyingNumbers]. `true` by default. */
8789
public val unifyNumbers: Boolean = true,
88-
)
90+
) {
91+
public object TypeClashTactics {
92+
public const val ARRAY_AND_VALUE_COLUMNS: String = "ARRAY_AND_VALUE_COLUMNS"
93+
public const val ANY_COLUMNS: String = "ANY_COLUMNS"
94+
}
95+
}

core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/and.kt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ import org.jetbrains.kotlinx.dataframe.columns.ColumnReference
99
import org.jetbrains.kotlinx.dataframe.columns.ColumnSet
1010
import org.jetbrains.kotlinx.dataframe.columns.ColumnsResolver
1111
import org.jetbrains.kotlinx.dataframe.columns.SingleColumn
12-
import org.jetbrains.kotlinx.dataframe.impl.columns.ColumnsList
12+
import org.jetbrains.kotlinx.dataframe.impl.columns.ColumnListImpl
1313
import kotlin.reflect.KProperty
1414

1515
// region ColumnsSelectionDsl
@@ -211,7 +211,7 @@ public interface AndColumnsSelectionDsl {
211211
* and right side of the [and][org.jetbrains.kotlinx.dataframe.api.AndColumnsSelectionDsl.and] operator.
212212
*/
213213
@Interpretable("And0")
214-
public infix fun <C> ColumnsResolver<C>.and(other: ColumnsResolver<C>): ColumnSet<C> = ColumnsList(this, other)
214+
public infix fun <C> ColumnsResolver<C>.and(other: ColumnsResolver<C>): ColumnSet<C> = ColumnListImpl(this, other)
215215

216216
/** ## And Operator
217217
* The [and][org.jetbrains.kotlinx.dataframe.api.AndColumnsSelectionDsl.and] operator allows you to combine selections of columns or simply select multiple columns at once.

core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/chunked.kt

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -15,10 +15,14 @@ import org.jetbrains.kotlinx.dataframe.type
1515
* Creates a [FrameColumn] from [this] by splitting the dataframe into
1616
* smaller ones, with their number of rows at most [size].
1717
*/
18-
public fun <T> DataFrame<T>.chunked(size: Int, name: String = "groups"): FrameColumn<T> {
19-
val startIndices = (0 until nrow step size)
20-
return this.chunkedImpl(startIndices, name)
21-
}
18+
public fun <T> DataFrame<T>.chunked(size: Int, name: String = "groups"): FrameColumn<T> =
19+
chunked(
20+
startIndices = 0 until nrow step size,
21+
name = name,
22+
)
23+
24+
public fun <T> DataFrame<T>.chunked(startIndices: Iterable<Int>, name: String = "groups"): FrameColumn<T> =
25+
chunkedImpl(startIndices, name)
2226

2327
public fun <T> DataColumn<T>.chunked(size: Int): ValueColumn<List<T>> {
2428
val values = toList().chunked(size)

core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/join.kt

Lines changed: 47 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,12 @@ import org.jetbrains.kotlinx.dataframe.columns.ColumnResolutionContext
1010
import org.jetbrains.kotlinx.dataframe.columns.ColumnSet
1111
import org.jetbrains.kotlinx.dataframe.columns.ColumnWithPath
1212
import org.jetbrains.kotlinx.dataframe.columns.ColumnsResolver
13+
import org.jetbrains.kotlinx.dataframe.columns.UnresolvedColumnsPolicy
1314
import org.jetbrains.kotlinx.dataframe.columns.toColumnSet
15+
import org.jetbrains.kotlinx.dataframe.impl.DataFrameReceiver
16+
import org.jetbrains.kotlinx.dataframe.impl.api.extractJoinColumns
1417
import org.jetbrains.kotlinx.dataframe.impl.api.joinImpl
18+
import org.jetbrains.kotlinx.dataframe.impl.columns.ColumnListImpl
1519
import kotlin.reflect.KProperty
1620

1721
@Refine
@@ -28,6 +32,8 @@ public fun <A, B> DataFrame<A>.join(
2832
type: JoinType = JoinType.Inner,
2933
): DataFrame<A> = join(other, type) { columns.toColumnSet() }
3034

35+
@Refine
36+
@Interpretable("InnerJoin")
3137
public fun <A, B> DataFrame<A>.innerJoin(
3238
other: DataFrame<B>,
3339
selector: JoinColumnsSelector<A, B>? = null,
@@ -36,6 +42,8 @@ public fun <A, B> DataFrame<A>.innerJoin(
3642
public fun <A, B> DataFrame<A>.innerJoin(other: DataFrame<B>, vararg columns: String): DataFrame<A> =
3743
innerJoin(other) { columns.toColumnSet() }
3844

45+
@Refine
46+
@Interpretable("LeftJoin")
3947
public fun <A, B> DataFrame<A>.leftJoin(
4048
other: DataFrame<B>,
4149
selector: JoinColumnsSelector<A, B>? = null,
@@ -44,6 +52,8 @@ public fun <A, B> DataFrame<A>.leftJoin(
4452
public fun <A, B> DataFrame<A>.leftJoin(other: DataFrame<B>, vararg columns: String): DataFrame<A> =
4553
leftJoin(other) { columns.toColumnSet() }
4654

55+
@Refine
56+
@Interpretable("RightJoin")
4757
public fun <A, B> DataFrame<A>.rightJoin(
4858
other: DataFrame<B>,
4959
selector: JoinColumnsSelector<A, B>? = null,
@@ -52,6 +62,8 @@ public fun <A, B> DataFrame<A>.rightJoin(
5262
public fun <A, B> DataFrame<A>.rightJoin(other: DataFrame<B>, vararg columns: String): DataFrame<A> =
5363
rightJoin(other) { columns.toColumnSet() }
5464

65+
@Refine
66+
@Interpretable("FullJoin")
5567
public fun <A, B> DataFrame<A>.fullJoin(
5668
other: DataFrame<B>,
5769
selector: JoinColumnsSelector<A, B>? = null,
@@ -60,6 +72,8 @@ public fun <A, B> DataFrame<A>.fullJoin(
6072
public fun <A, B> DataFrame<A>.fullJoin(other: DataFrame<B>, vararg columns: String): DataFrame<A> =
6173
fullJoin(other) { columns.toColumnSet() }
6274

75+
@Refine
76+
@Interpretable("FilterJoin")
6377
public fun <A, B> DataFrame<A>.filterJoin(
6478
other: DataFrame<B>,
6579
selector: JoinColumnsSelector<A, B>? = null,
@@ -68,6 +82,8 @@ public fun <A, B> DataFrame<A>.filterJoin(
6882
public fun <A, B> DataFrame<A>.filterJoin(other: DataFrame<B>, vararg columns: String): DataFrame<A> =
6983
filterJoin(other) { columns.toColumnSet() }
7084

85+
@Refine
86+
@Interpretable("ExcludeJoin")
7187
public fun <A, B> DataFrame<A>.excludeJoin(
7288
other: DataFrame<B>,
7389
selector: JoinColumnsSelector<A, B>? = null,
@@ -107,14 +123,44 @@ public interface JoinDsl<out A, out B> : ColumnsSelectionDsl<A> {
107123
@AccessApiOverload
108124
public infix fun <C> KProperty<C>.match(other: ColumnReference<C>): ColumnMatch<C> =
109125
ColumnMatch(toColumnAccessor(), other)
126+
127+
public companion object {
128+
public fun <A, B> defaultJoinColumns(left: DataFrame<A>, right: DataFrame<B>): JoinColumnsSelector<A, B> =
129+
{
130+
left.columnNames().intersect(right.columnNames().toSet())
131+
.map { it.toColumnAccessor() }
132+
.let { ColumnListImpl(it) }
133+
}
134+
135+
public fun <A, B> getColumns(
136+
left: DataFrame<A>,
137+
other: DataFrame<B>,
138+
selector: JoinColumnsSelector<A, B>,
139+
): List<ColumnMatch<Any?>> {
140+
val receiver = object : DataFrameReceiver<A>(left, UnresolvedColumnsPolicy.Fail), JoinDsl<A, B> {
141+
override val right: DataFrame<B> = DataFrameReceiver(other, UnresolvedColumnsPolicy.Fail)
142+
}
143+
val columns = selector(receiver, left)
144+
return columns.extractJoinColumns()
145+
}
146+
}
110147
}
111148

112-
public class ColumnMatch<C>(public val left: ColumnReference<C>, public val right: ColumnReference<C>) : ColumnSet<C> {
149+
public interface ColumnMatch<C> : ColumnSet<C> {
150+
public val left: ColumnReference<C>
151+
public val right: ColumnReference<C>
152+
}
153+
154+
internal class ColumnMatchImpl<C>(override val left: ColumnReference<C>, override val right: ColumnReference<C>) :
155+
ColumnMatch<C> {
113156

114157
override fun resolve(context: ColumnResolutionContext): List<ColumnWithPath<C>> =
115158
throw UnsupportedOperationException()
116159
}
117160

161+
public fun <C> ColumnMatch(left: ColumnReference<C>, right: ColumnReference<C>): ColumnMatch<C> =
162+
ColumnMatchImpl(left, right)
163+
118164
public typealias JoinColumnsSelector<A, B> = JoinDsl<A, B>.(ColumnsContainer<A>) -> ColumnsResolver<*>
119165

120166
public enum class JoinType {

core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/joinWith.kt

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,8 @@ package org.jetbrains.kotlinx.dataframe.api
33
import org.jetbrains.kotlinx.dataframe.DataFrame
44
import org.jetbrains.kotlinx.dataframe.DataRow
55
import org.jetbrains.kotlinx.dataframe.Selector
6+
import org.jetbrains.kotlinx.dataframe.annotations.Interpretable
7+
import org.jetbrains.kotlinx.dataframe.annotations.Refine
68
import org.jetbrains.kotlinx.dataframe.impl.api.joinWithImpl
79

810
public interface JoinedDataRow<out A, out B> : DataRow<A> {
@@ -11,27 +13,41 @@ public interface JoinedDataRow<out A, out B> : DataRow<A> {
1113

1214
public typealias JoinExpression<A, B> = Selector<JoinedDataRow<A, B>, Boolean>
1315

16+
@Refine
17+
@Interpretable("JoinWith")
1418
public fun <A, B> DataFrame<A>.joinWith(
1519
right: DataFrame<B>,
1620
type: JoinType = JoinType.Inner,
1721
joinExpression: JoinExpression<A, B>,
1822
): DataFrame<A> = joinWithImpl(right, type, addNewColumns = type.addNewColumns, joinExpression)
1923

24+
@Refine
25+
@Interpretable("InnerJoinWith")
2026
public fun <A, B> DataFrame<A>.innerJoinWith(right: DataFrame<B>, joinExpression: JoinExpression<A, B>): DataFrame<A> =
2127
joinWith(right, JoinType.Inner, joinExpression)
2228

29+
@Refine
30+
@Interpretable("LeftJoinWith")
2331
public fun <A, B> DataFrame<A>.leftJoinWith(right: DataFrame<B>, joinExpression: JoinExpression<A, B>): DataFrame<A> =
2432
joinWith(right, JoinType.Left, joinExpression)
2533

34+
@Refine
35+
@Interpretable("RightJoinWith")
2636
public fun <A, B> DataFrame<A>.rightJoinWith(right: DataFrame<B>, joinExpression: JoinExpression<A, B>): DataFrame<A> =
2737
joinWith(right, JoinType.Right, joinExpression)
2838

39+
@Refine
40+
@Interpretable("FullJoinWith")
2941
public fun <A, B> DataFrame<A>.fullJoinWith(right: DataFrame<B>, joinExpression: JoinExpression<A, B>): DataFrame<A> =
3042
joinWith(right, JoinType.Full, joinExpression)
3143

44+
@Refine
45+
@Interpretable("FilterJoinWith")
3246
public fun <A, B> DataFrame<A>.filterJoinWith(right: DataFrame<B>, joinExpression: JoinExpression<A, B>): DataFrame<A> =
3347
joinWithImpl(right, JoinType.Inner, addNewColumns = false, joinExpression)
3448

49+
@Refine
50+
@Interpretable("ExcludeJoinWith")
3551
public fun <A, B> DataFrame<A>.excludeJoinWith(
3652
right: DataFrame<B>,
3753
joinExpression: JoinExpression<A, B>,

core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/none.kt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ package org.jetbrains.kotlinx.dataframe.api
33
import org.jetbrains.kotlinx.dataframe.DataFrame
44
import org.jetbrains.kotlinx.dataframe.columns.ColumnSet
55
import org.jetbrains.kotlinx.dataframe.columns.ColumnsResolver
6-
import org.jetbrains.kotlinx.dataframe.impl.columns.ColumnsList
6+
import org.jetbrains.kotlinx.dataframe.impl.columns.ColumnListImpl
77

88
// region ColumnsSelectionDsl
99

@@ -71,7 +71,7 @@ public interface NoneColumnsSelectionDsl {
7171
*
7272
* @return An empty [ColumnsResolver].
7373
*/
74-
public fun none(): ColumnsResolver<*> = ColumnsList<Any?>(emptyList())
74+
public fun none(): ColumnsResolver<*> = ColumnListImpl<Any?>(emptyList())
7575
}
7676

7777
// endregion

core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/codeGen/DefaultReadDfMethods.kt

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,6 @@ private const val CAST = "cast"
2323
private const val VERIFY = "verify" // cast(true) is obscure, i think it's better to use named argument here
2424
private const val READ_CSV = "readCSV"
2525
private const val READ_TSV = "readTSV"
26-
private const val READ_JSON = "readJson"
2726
private const val READ_JDBC = "readJdbc"
2827

2928
public abstract class AbstractDefaultReadMethod(
@@ -82,13 +81,6 @@ public abstract class AbstractDefaultReadMethod(
8281
override val additionalImports: List<String> = listOf("import org.jetbrains.kotlinx.dataframe.io.$methodName")
8382
}
8483

85-
internal class DefaultReadJsonMethod(path: String?, arguments: MethodArguments) :
86-
AbstractDefaultReadMethod(
87-
path = path,
88-
arguments = arguments,
89-
methodName = READ_JSON,
90-
)
91-
9284
internal class DefaultReadCsvMethod(path: String?, arguments: MethodArguments) :
9385
AbstractDefaultReadMethod(path, arguments, READ_CSV)
9486

0 commit comments

Comments
 (0)