Skip to content

Commit 6dc691d

Browse files
authored
Merge pull request #1078 from Kotlin/aggregators
Aggregator implementation rework
2 parents 19baaba + 7ed9c3b commit 6dc691d

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

63 files changed

+2146
-530
lines changed

core/api/core.api

+17-12
Original file line numberDiff line numberDiff line change
@@ -5302,26 +5302,30 @@ public abstract interface class org/jetbrains/kotlinx/dataframe/impl/aggregation
53025302
public abstract fun aggregate (Ljava/lang/Iterable;)Ljava/lang/Object;
53035303
public abstract fun aggregate (Ljava/lang/Iterable;Lkotlin/reflect/KType;)Ljava/lang/Object;
53045304
public abstract fun aggregate (Lorg/jetbrains/kotlinx/dataframe/DataColumn;)Ljava/lang/Object;
5305+
public abstract fun aggregateCalculatingType (Ljava/lang/Iterable;Ljava/util/Set;)Ljava/lang/Object;
5306+
public static synthetic fun aggregateCalculatingType$default (Lorg/jetbrains/kotlinx/dataframe/impl/aggregation/aggregators/Aggregator;Ljava/lang/Iterable;Ljava/util/Set;ILjava/lang/Object;)Ljava/lang/Object;
5307+
public abstract fun calculateReturnTypeOrNull (Ljava/util/Set;Z)Lkotlin/reflect/KType;
5308+
public abstract fun calculateReturnTypeOrNull (Lkotlin/reflect/KType;Z)Lkotlin/reflect/KType;
53055309
public abstract fun getName ()Ljava/lang/String;
5306-
public abstract fun getPreservesType ()Z
53075310
}
53085311

53095312
public final class org/jetbrains/kotlinx/dataframe/impl/aggregation/aggregators/AggregatorKt {
53105313
public static final fun cast (Lorg/jetbrains/kotlinx/dataframe/impl/aggregation/aggregators/Aggregator;)Lorg/jetbrains/kotlinx/dataframe/impl/aggregation/aggregators/Aggregator;
53115314
public static final fun cast2 (Lorg/jetbrains/kotlinx/dataframe/impl/aggregation/aggregators/Aggregator;)Lorg/jetbrains/kotlinx/dataframe/impl/aggregation/aggregators/Aggregator;
53125315
}
53135316

5314-
public final class org/jetbrains/kotlinx/dataframe/impl/aggregation/aggregators/AggregatorOptionSwitch {
5317+
public final class org/jetbrains/kotlinx/dataframe/impl/aggregation/aggregators/AggregatorOptionSwitch1 {
53155318
public fun <init> (Ljava/lang/String;Lkotlin/jvm/functions/Function1;)V
53165319
public final fun getGetAggregator ()Lkotlin/jvm/functions/Function1;
53175320
public final fun getName ()Ljava/lang/String;
53185321
public final fun invoke (Ljava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/impl/aggregation/aggregators/Aggregator;
53195322
}
53205323

5321-
public final class org/jetbrains/kotlinx/dataframe/impl/aggregation/aggregators/AggregatorOptionSwitch$Factory {
5324+
public final class org/jetbrains/kotlinx/dataframe/impl/aggregation/aggregators/AggregatorOptionSwitch1$Factory : org/jetbrains/kotlinx/dataframe/impl/aggregation/aggregators/Provider {
53225325
public fun <init> (Lkotlin/jvm/functions/Function1;)V
5326+
public synthetic fun create (Ljava/lang/String;)Ljava/lang/Object;
5327+
public fun create (Ljava/lang/String;)Lorg/jetbrains/kotlinx/dataframe/impl/aggregation/aggregators/AggregatorOptionSwitch1;
53235328
public final fun getGetAggregator ()Lkotlin/jvm/functions/Function1;
5324-
public final fun getValue (Ljava/lang/Object;Lkotlin/reflect/KProperty;)Lorg/jetbrains/kotlinx/dataframe/impl/aggregation/aggregators/AggregatorOptionSwitch;
53255329
}
53265330

53275331
public final class org/jetbrains/kotlinx/dataframe/impl/aggregation/aggregators/AggregatorOptionSwitch2 {
@@ -5331,21 +5335,22 @@ public final class org/jetbrains/kotlinx/dataframe/impl/aggregation/aggregators/
53315335
public final fun invoke (Ljava/lang/Object;Ljava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/impl/aggregation/aggregators/Aggregator;
53325336
}
53335337

5334-
public final class org/jetbrains/kotlinx/dataframe/impl/aggregation/aggregators/AggregatorOptionSwitch2$Factory {
5338+
public final class org/jetbrains/kotlinx/dataframe/impl/aggregation/aggregators/AggregatorOptionSwitch2$Factory : org/jetbrains/kotlinx/dataframe/impl/aggregation/aggregators/Provider {
53355339
public fun <init> (Lkotlin/jvm/functions/Function2;)V
5340+
public synthetic fun create (Ljava/lang/String;)Ljava/lang/Object;
5341+
public fun create (Ljava/lang/String;)Lorg/jetbrains/kotlinx/dataframe/impl/aggregation/aggregators/AggregatorOptionSwitch2;
53365342
public final fun getGetAggregator ()Lkotlin/jvm/functions/Function2;
5337-
public final fun getValue (Ljava/lang/Object;Lkotlin/reflect/KProperty;)Lorg/jetbrains/kotlinx/dataframe/impl/aggregation/aggregators/AggregatorOptionSwitch2;
53385343
}
53395344

53405345
public final class org/jetbrains/kotlinx/dataframe/impl/aggregation/aggregators/Aggregators {
53415346
public static final field INSTANCE Lorg/jetbrains/kotlinx/dataframe/impl/aggregation/aggregators/Aggregators;
5342-
public final fun getMax ()Lorg/jetbrains/kotlinx/dataframe/impl/aggregation/aggregators/Aggregator;
5343-
public final fun getMean ()Lorg/jetbrains/kotlinx/dataframe/impl/aggregation/aggregators/AggregatorOptionSwitch;
5344-
public final fun getMedian ()Lorg/jetbrains/kotlinx/dataframe/impl/aggregation/aggregators/MergedValuesAggregator;
5345-
public final fun getMin ()Lorg/jetbrains/kotlinx/dataframe/impl/aggregation/aggregators/Aggregator;
5346-
public final fun getPercentile ()Lorg/jetbrains/kotlinx/dataframe/impl/aggregation/aggregators/AggregatorOptionSwitch;
5347+
public final fun getMax ()Lorg/jetbrains/kotlinx/dataframe/impl/aggregation/aggregators/TwoStepAggregator;
5348+
public final fun getMean ()Lorg/jetbrains/kotlinx/dataframe/impl/aggregation/aggregators/AggregatorOptionSwitch1;
5349+
public final fun getMedian ()Lorg/jetbrains/kotlinx/dataframe/impl/aggregation/aggregators/FlatteningAggregator;
5350+
public final fun getMin ()Lorg/jetbrains/kotlinx/dataframe/impl/aggregation/aggregators/TwoStepAggregator;
5351+
public final fun getPercentile ()Lorg/jetbrains/kotlinx/dataframe/impl/aggregation/aggregators/AggregatorOptionSwitch1;
53475352
public final fun getStd ()Lorg/jetbrains/kotlinx/dataframe/impl/aggregation/aggregators/AggregatorOptionSwitch2;
5348-
public final fun getSum ()Lorg/jetbrains/kotlinx/dataframe/impl/aggregation/aggregators/NumbersAggregator;
5353+
public final fun getSum ()Lorg/jetbrains/kotlinx/dataframe/impl/aggregation/aggregators/TwoStepNumbersAggregator;
53495354
}
53505355

53515356
public final class org/jetbrains/kotlinx/dataframe/impl/aggregation/modes/NoAggregationKt {

core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/max.kt

+4-4
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ import org.jetbrains.kotlinx.dataframe.columns.ColumnReference
1414
import org.jetbrains.kotlinx.dataframe.columns.toColumnSet
1515
import org.jetbrains.kotlinx.dataframe.columns.values
1616
import org.jetbrains.kotlinx.dataframe.impl.aggregation.aggregators.Aggregators
17-
import org.jetbrains.kotlinx.dataframe.impl.aggregation.interComparableColumns
17+
import org.jetbrains.kotlinx.dataframe.impl.aggregation.intraComparableColumns
1818
import org.jetbrains.kotlinx.dataframe.impl.aggregation.modes.aggregateAll
1919
import org.jetbrains.kotlinx.dataframe.impl.aggregation.modes.aggregateFor
2020
import org.jetbrains.kotlinx.dataframe.impl.aggregation.modes.aggregateOfDelegated
@@ -55,7 +55,7 @@ public inline fun <reified T : Comparable<T>> AnyRow.rowMaxOf(): T = rowMaxOfOrN
5555

5656
// region DataFrame
5757

58-
public fun <T> DataFrame<T>.max(): DataRow<T> = maxFor(interComparableColumns())
58+
public fun <T> DataFrame<T>.max(): DataRow<T> = maxFor(intraComparableColumns())
5959

6060
public fun <T, C : Comparable<C>> DataFrame<T>.maxFor(columns: ColumnsForAggregateSelector<T, C?>): DataRow<T> =
6161
Aggregators.max.aggregateFor(this, columns)
@@ -135,7 +135,7 @@ public fun <T, C : Comparable<C>> DataFrame<T>.maxByOrNull(column: KProperty<C?>
135135
// region GroupBy
136136
@Refine
137137
@Interpretable("GroupByMax1")
138-
public fun <T> Grouped<T>.max(): DataFrame<T> = maxFor(interComparableColumns())
138+
public fun <T> Grouped<T>.max(): DataFrame<T> = maxFor(intraComparableColumns())
139139

140140
@Refine
141141
@Interpretable("GroupByMax0")
@@ -251,7 +251,7 @@ public fun <T, C : Comparable<C>> Pivot<T>.maxBy(column: KProperty<C?>): Reduced
251251

252252
// region PivotGroupBy
253253

254-
public fun <T> PivotGroupBy<T>.max(separate: Boolean = false): DataFrame<T> = maxFor(separate, interComparableColumns())
254+
public fun <T> PivotGroupBy<T>.max(separate: Boolean = false): DataFrame<T> = maxFor(separate, intraComparableColumns())
255255

256256
public fun <T, R : Comparable<R>> PivotGroupBy<T>.maxFor(
257257
separate: Boolean = false,

core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/median.kt

+8-7
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ import org.jetbrains.kotlinx.dataframe.columns.ColumnReference
1414
import org.jetbrains.kotlinx.dataframe.columns.toColumnSet
1515
import org.jetbrains.kotlinx.dataframe.impl.aggregation.aggregators.Aggregators
1616
import org.jetbrains.kotlinx.dataframe.impl.aggregation.aggregators.cast
17-
import org.jetbrains.kotlinx.dataframe.impl.aggregation.interComparableColumns
17+
import org.jetbrains.kotlinx.dataframe.impl.aggregation.intraComparableColumns
1818
import org.jetbrains.kotlinx.dataframe.impl.aggregation.modes.aggregateAll
1919
import org.jetbrains.kotlinx.dataframe.impl.aggregation.modes.aggregateFor
2020
import org.jetbrains.kotlinx.dataframe.impl.aggregation.modes.aggregateOf
@@ -41,8 +41,9 @@ public inline fun <T, reified R : Comparable<R>> DataColumn<T>.medianOf(noinline
4141
// region DataRow
4242

4343
public fun AnyRow.rowMedianOrNull(): Any? =
44-
Aggregators.median.aggregateMixed(
45-
values().filterIsInstance<Comparable<Any?>>().asIterable(),
44+
Aggregators.median.aggregateCalculatingType(
45+
values = values().filterIsInstance<Comparable<Any?>>().asIterable(),
46+
valueTypes = df().columns().filter { it.valuesAreComparable() }.map { it.type() }.toSet(),
4647
)
4748

4849
public fun AnyRow.rowMedian(): Any = rowMedianOrNull().suggestIfNull("rowMedian")
@@ -56,7 +57,7 @@ public inline fun <reified T : Comparable<T>> AnyRow.rowMedianOf(): T =
5657

5758
// region DataFrame
5859

59-
public fun <T> DataFrame<T>.median(): DataRow<T> = medianFor(interComparableColumns())
60+
public fun <T> DataFrame<T>.median(): DataRow<T> = medianFor(intraComparableColumns())
6061

6162
public fun <T, C : Comparable<C>> DataFrame<T>.medianFor(columns: ColumnsForAggregateSelector<T, C?>): DataRow<T> =
6263
Aggregators.median.aggregateFor(this, columns)
@@ -107,7 +108,7 @@ public inline fun <T, reified R : Comparable<R>> DataFrame<T>.medianOf(
107108
// region GroupBy
108109
@Refine
109110
@Interpretable("GroupByMedian1")
110-
public fun <T> Grouped<T>.median(): DataFrame<T> = medianFor(interComparableColumns())
111+
public fun <T> Grouped<T>.median(): DataFrame<T> = medianFor(intraComparableColumns())
111112

112113
@Refine
113114
@Interpretable("GroupByMedian0")
@@ -155,7 +156,7 @@ public inline fun <T, reified R : Comparable<R>> Grouped<T>.medianOf(
155156

156157
// region Pivot
157158

158-
public fun <T> Pivot<T>.median(separate: Boolean = false): DataRow<T> = medianFor(separate, interComparableColumns())
159+
public fun <T> Pivot<T>.median(separate: Boolean = false): DataRow<T> = medianFor(separate, intraComparableColumns())
159160

160161
public fun <T, C : Comparable<C>> Pivot<T>.medianFor(
161162
separate: Boolean = false,
@@ -199,7 +200,7 @@ public inline fun <T, reified R : Comparable<R>> Pivot<T>.medianOf(
199200
// region PivotGroupBy
200201

201202
public fun <T> PivotGroupBy<T>.median(separate: Boolean = false): DataFrame<T> =
202-
medianFor(separate, interComparableColumns())
203+
medianFor(separate, intraComparableColumns())
203204

204205
public fun <T, C : Comparable<C>> PivotGroupBy<T>.medianFor(
205206
separate: Boolean = false,

core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/min.kt

+4-4
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ import org.jetbrains.kotlinx.dataframe.columns.ColumnReference
1414
import org.jetbrains.kotlinx.dataframe.columns.toColumnSet
1515
import org.jetbrains.kotlinx.dataframe.columns.values
1616
import org.jetbrains.kotlinx.dataframe.impl.aggregation.aggregators.Aggregators
17-
import org.jetbrains.kotlinx.dataframe.impl.aggregation.interComparableColumns
17+
import org.jetbrains.kotlinx.dataframe.impl.aggregation.intraComparableColumns
1818
import org.jetbrains.kotlinx.dataframe.impl.aggregation.modes.aggregateAll
1919
import org.jetbrains.kotlinx.dataframe.impl.aggregation.modes.aggregateFor
2020
import org.jetbrains.kotlinx.dataframe.impl.aggregation.modes.aggregateOfDelegated
@@ -55,7 +55,7 @@ public inline fun <reified T : Comparable<T>> AnyRow.rowMinOf(): T = rowMinOfOrN
5555

5656
// region DataFrame
5757

58-
public fun <T> DataFrame<T>.min(): DataRow<T> = minFor(interComparableColumns())
58+
public fun <T> DataFrame<T>.min(): DataRow<T> = minFor(intraComparableColumns())
5959

6060
public fun <T, C : Comparable<C>> DataFrame<T>.minFor(columns: ColumnsForAggregateSelector<T, C?>): DataRow<T> =
6161
Aggregators.min.aggregateFor(this, columns)
@@ -135,7 +135,7 @@ public fun <T, C : Comparable<C>> DataFrame<T>.minByOrNull(column: KProperty<C?>
135135
// region GroupBy
136136
@Refine
137137
@Interpretable("GroupByMin1")
138-
public fun <T> Grouped<T>.min(): DataFrame<T> = minFor(interComparableColumns())
138+
public fun <T> Grouped<T>.min(): DataFrame<T> = minFor(intraComparableColumns())
139139

140140
@Refine
141141
@Interpretable("GroupByMin0")
@@ -252,7 +252,7 @@ public fun <T, C : Comparable<C>> Pivot<T>.minBy(column: KProperty<C?>): Reduced
252252

253253
// region PivotGroupBy
254254

255-
public fun <T> PivotGroupBy<T>.min(separate: Boolean = false): DataFrame<T> = minFor(separate, interComparableColumns())
255+
public fun <T> PivotGroupBy<T>.min(separate: Boolean = false): DataFrame<T> = minFor(separate, intraComparableColumns())
256256

257257
public fun <T, R : Comparable<R>> PivotGroupBy<T>.minFor(
258258
separate: Boolean = false,

core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/percentile.kt

+5-5
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ import org.jetbrains.kotlinx.dataframe.columns.ColumnReference
1212
import org.jetbrains.kotlinx.dataframe.columns.toColumnSet
1313
import org.jetbrains.kotlinx.dataframe.impl.aggregation.aggregators.Aggregators
1414
import org.jetbrains.kotlinx.dataframe.impl.aggregation.aggregators.cast
15-
import org.jetbrains.kotlinx.dataframe.impl.aggregation.interComparableColumns
15+
import org.jetbrains.kotlinx.dataframe.impl.aggregation.intraComparableColumns
1616
import org.jetbrains.kotlinx.dataframe.impl.aggregation.modes.aggregateAll
1717
import org.jetbrains.kotlinx.dataframe.impl.aggregation.modes.aggregateFor
1818
import org.jetbrains.kotlinx.dataframe.impl.aggregation.modes.aggregateOf
@@ -63,7 +63,7 @@ public inline fun <reified T : Comparable<T>> AnyRow.rowPercentileOf(percentile:
6363
// region DataFrame
6464

6565
public fun <T> DataFrame<T>.percentile(percentile: Double): DataRow<T> =
66-
percentileFor(percentile, interComparableColumns())
66+
percentileFor(percentile, intraComparableColumns())
6767

6868
public fun <T, C : Comparable<C>> DataFrame<T>.percentileFor(
6969
percentile: Double,
@@ -128,7 +128,7 @@ public inline fun <T, reified R : Comparable<R>> DataFrame<T>.percentileOf(
128128
// region GroupBy
129129

130130
public fun <T> Grouped<T>.percentile(percentile: Double): DataFrame<T> =
131-
percentileFor(percentile, interComparableColumns())
131+
percentileFor(percentile, intraComparableColumns())
132132

133133
public fun <T, C : Comparable<C>> Grouped<T>.percentileFor(
134134
percentile: Double,
@@ -184,7 +184,7 @@ public inline fun <T, reified R : Comparable<R>> Grouped<T>.percentileOf(
184184
// region Pivot
185185

186186
public fun <T> Pivot<T>.percentile(percentile: Double, separate: Boolean = false): DataRow<T> =
187-
percentileFor(percentile, separate, interComparableColumns())
187+
percentileFor(percentile, separate, intraComparableColumns())
188188

189189
public fun <T, C : Comparable<C>> Pivot<T>.percentileFor(
190190
percentile: Double,
@@ -238,7 +238,7 @@ public inline fun <T, reified R : Comparable<R>> Pivot<T>.percentileOf(
238238
// region PivotGroupBy
239239

240240
public fun <T> PivotGroupBy<T>.percentile(percentile: Double, separate: Boolean = false): DataFrame<T> =
241-
percentileFor(percentile, separate, interComparableColumns())
241+
percentileFor(percentile, separate, intraComparableColumns())
242242

243243
public fun <T, C : Comparable<C>> PivotGroupBy<T>.percentileFor(
244244
percentile: Double,

core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/sum.kt

+2-2
Original file line numberDiff line numberDiff line change
@@ -46,9 +46,9 @@ public inline fun <T, reified R : Number> DataColumn<T>.sumOf(crossinline expres
4646
// region DataRow
4747

4848
public fun AnyRow.rowSum(): Number =
49-
Aggregators.sum.aggregateMixed(
49+
Aggregators.sum.aggregateCalculatingType(
5050
values = values().filterIsInstance<Number>(),
51-
types = columnTypes().filter { it.isSubtypeOf(typeOf<Number?>()) }.toSet(),
51+
valueTypes = columnTypes().filter { it.isSubtypeOf(typeOf<Number?>()) }.toSet(),
5252
) ?: 0
5353

5454
public inline fun <reified T : Number> AnyRow.rowSumOf(): T = values().filterIsInstance<T>().sum(typeOf<T>())

core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/columns/BaseColumn.kt

+2
Original file line numberDiff line numberDiff line change
@@ -100,3 +100,5 @@ public interface BaseColumn<out T> : ColumnReference<T> {
100100
internal val <T> BaseColumn<T>.values: Iterable<T> get() = values()
101101

102102
internal val AnyBaseCol.size: Int get() = size()
103+
104+
internal val AnyBaseCol.isEmpty: Boolean get() = size() == 0

core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/UnifyingNumbers.kt

+15-6
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
package org.jetbrains.kotlinx.dataframe.documentation
22

3+
import org.jetbrains.kotlinx.dataframe.impl.UnifiedNumberTypeOptions
4+
35
/**
46
* ## Unifying Numbers
57
*
@@ -9,11 +11,11 @@ package org.jetbrains.kotlinx.dataframe.documentation
911
* The order is top-down from the most complex type to the simplest one.
1012
*
1113
* ```
12-
* BigDecimal
14+
* (BigDecimal)
1315
* / \
14-
* BigInteger \
16+
* (BigInteger) \
1517
* / \ \
16-
* ULong Long Double
18+
* <~ ULong Long ~> Double ..
1719
* .. | / | / | \..
1820
* \ | / | / |
1921
* UInt Int Float
@@ -27,16 +29,23 @@ package org.jetbrains.kotlinx.dataframe.documentation
2729
* For each number type in the graph, it holds that a number of that type can be expressed lossless by
2830
* a number of a more complex type (any of its parents).
2931
* This is either because the more complex type has a larger range or higher precision (in terms of bits).
32+
*
33+
* There are variants of this graph that exclude some types, such as `BigDecimal` and `BigInteger`.
34+
* In these cases `Double` could be considered the most complex type.
35+
* `Long`/`ULong` and `Double` could be joined to `Double`,
36+
* potentially losing a little precision, but a warning will be given.
37+
*
38+
* See [UnifiedNumberTypeOptions] for these settings.
3039
*/
3140
internal interface UnifyingNumbers {
3241

3342
/**
3443
* ```
35-
* BigDecimal
44+
* (BigDecimal)
3645
* / \
37-
* BigInteger \
46+
* (BigInteger) \
3847
* / \ \
39-
* ULong Long Double
48+
* <~ ULong Long ~> Double ..
4049
* .. | / | / | \..
4150
* \ | / | / |
4251
* UInt Int Float

0 commit comments

Comments
 (0)