Skip to content

Commit 502042a

Browse files
committed
fixed aggregators based on feedback. Removed preservesType property. It's unneeded as we can calculate return types at runtime quickly. Added overload for calculateReturnTypeOrNull for multiple columns. Aggregator callers now use this function instead of preservesType
1 parent 9a0e265 commit 502042a

File tree

30 files changed

+297
-106
lines changed

30 files changed

+297
-106
lines changed

core/api/core.api

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5304,9 +5304,9 @@ public abstract interface class org/jetbrains/kotlinx/dataframe/impl/aggregation
53045304
public abstract fun aggregate (Lorg/jetbrains/kotlinx/dataframe/DataColumn;)Ljava/lang/Object;
53055305
public abstract fun aggregateCalculatingType (Ljava/lang/Iterable;Ljava/util/Set;)Ljava/lang/Object;
53065306
public static synthetic fun aggregateCalculatingType$default (Lorg/jetbrains/kotlinx/dataframe/impl/aggregation/aggregators/Aggregator;Ljava/lang/Iterable;Ljava/util/Set;ILjava/lang/Object;)Ljava/lang/Object;
5307+
public abstract fun calculateReturnTypeOrNull (Ljava/util/Set;Z)Lkotlin/reflect/KType;
53075308
public abstract fun calculateReturnTypeOrNull (Lkotlin/reflect/KType;Z)Lkotlin/reflect/KType;
53085309
public abstract fun getName ()Ljava/lang/String;
5309-
public abstract fun getPreservesType ()Z
53105310
}
53115311

53125312
public final class org/jetbrains/kotlinx/dataframe/impl/aggregation/aggregators/AggregatorKt {

core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/columns/BaseColumn.kt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -100,3 +100,5 @@ public interface BaseColumn<out T> : ColumnReference<T> {
100100
internal val <T> BaseColumn<T>.values: Iterable<T> get() = values()
101101

102102
internal val AnyBaseCol.size: Int get() = size()
103+
104+
internal val AnyBaseCol.isEmpty: Boolean get() = size() == 0

core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/aggregation/aggregators/Aggregator.kt

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -24,9 +24,6 @@ internal interface Aggregator<in Value, out Return> {
2424
/** The name of this aggregator. */
2525
val name: String
2626

27-
/** If `true`, [Value][Value]` == ` [Return][Return]. */
28-
val preservesType: Boolean
29-
3027
/**
3128
* Base function of [Aggregator].
3229
*
@@ -72,6 +69,17 @@ internal interface Aggregator<in Value, out Return> {
7269
* @return The return type of [aggregate] as [KType].
7370
*/
7471
fun calculateReturnTypeOrNull(type: KType, emptyInput: Boolean): KType?
72+
73+
/**
74+
* Function that can give the return type of [aggregate] with columns as [KType],
75+
* given the multiple types of the input.
76+
* This allows aggregators to avoid runtime type calculations.
77+
*
78+
* @param colTypes The types of the input columns.
79+
* @param colsEmpty If `true`, all the input columns are considered empty. This often affects the return type.
80+
* @return The return type of [aggregate] as [KType].
81+
*/
82+
fun calculateReturnTypeOrNull(colTypes: Set<KType>, colsEmpty: Boolean): KType?
7583
}
7684

7785
@PublishedApi

core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/aggregation/aggregators/AggregatorBase.kt

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -94,4 +94,15 @@ internal abstract class AggregatorBase<in Value, out Return>(
9494
* Must be overridden to use.
9595
*/
9696
abstract override fun aggregate(columns: Iterable<DataColumn<Value?>>): Return?
97+
98+
/**
99+
* Function that can give the return type of [aggregate] with columns as [KType],
100+
* given the multiple types of the input.
101+
* This allows aggregators to avoid runtime type calculations.
102+
*
103+
* @param colTypes The types of the input columns.
104+
* @param colsEmpty If `true`, all the input columns are considered empty. This often affects the return type.
105+
* @return The return type of [aggregate] as [KType].
106+
*/
107+
abstract override fun calculateReturnTypeOrNull(colTypes: Set<KType>, colsEmpty: Boolean): KType?
97108
}

core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/aggregation/aggregators/AggregatorOptionSwitch.kt

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -30,9 +30,8 @@ internal class AggregatorOptionSwitch1<in Param1, out AggregatorType : Aggregato
3030
*/
3131
class Factory<in Param1, out AggregatorType : Aggregator<*, *>>(
3232
val getAggregator: (param1: Param1) -> AggregatorProvider<AggregatorType>,
33-
) : Provider<AggregatorOptionSwitch1<Param1, AggregatorType>> by Provider({ name ->
34-
AggregatorOptionSwitch1(name, getAggregator)
35-
})
33+
) : Provider<AggregatorOptionSwitch1<Param1, AggregatorType>> by
34+
Provider({ name -> AggregatorOptionSwitch1(name, getAggregator) })
3635
}
3736

3837
/**
@@ -66,7 +65,6 @@ internal class AggregatorOptionSwitch2<in Param1, in Param2, out AggregatorType
6665
*/
6766
class Factory<in Param1, in Param2, out AggregatorType : Aggregator<*, *>>(
6867
val getAggregator: (param1: Param1, param2: Param2) -> AggregatorProvider<AggregatorType>,
69-
) : Provider<AggregatorOptionSwitch2<Param1, Param2, AggregatorType>> by Provider({ name ->
70-
AggregatorOptionSwitch2(name, getAggregator)
71-
})
68+
) : Provider<AggregatorOptionSwitch2<Param1, Param2, AggregatorType>> by
69+
Provider({ name -> AggregatorOptionSwitch2(name, getAggregator) })
7270
}

core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/aggregation/aggregators/Aggregators.kt

Lines changed: 7 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -33,24 +33,21 @@ internal object Aggregators {
3333
* -> Return?
3434
* ```
3535
*
36-
* It can also be used as a "simple" aggregator by providing the same function for both steps,
37-
* requires [preservesType][org.jetbrains.kotlinx.dataframe.impl.aggregation.aggregators.TwoStepAggregator.preservesType] be set to `true`.
36+
* It can also be used as a "simple" aggregator by providing the same function for both steps.
3837
*
3938
* See [FlatteningAggregator][org.jetbrains.kotlinx.dataframe.impl.aggregation.aggregators.FlatteningAggregator] for different behavior for multiple columns.
4039
*
4140
* @param name The name of this aggregator.
42-
* @param getReturnTypeOrNull Functional argument for the [calculateReturnTypeOrNull][org.jetbrains.kotlinx.dataframe.impl.aggregation.aggregators.AggregatorBase.calculateReturnTypeOrNull] function.
41+
* @param getReturnTypeOrNull Functional argument for the [calculateReturnTypeOrNull][org.jetbrains.kotlinx.dataframe.impl.aggregation.aggregators.TwoStepAggregator.calculateReturnTypeOrNull] function.
4342
* @param stepOneAggregator Functional argument for the [aggregate][org.jetbrains.kotlinx.dataframe.impl.aggregation.aggregators.TwoStepAggregator.aggregate] function, used within a [DataColumn][org.jetbrains.kotlinx.dataframe.DataColumn] or [Iterable].
4443
* @param stepTwoAggregator Functional argument for the aggregation function used between different columns.
4544
* It is run on the results of [stepOneAggregator].
46-
* @param preservesType If `true`, [Value][Value]` == `[Return][Return].
4745
*/
4846
private fun <Type> twoStepPreservingType(aggregator: Aggregate<Type, Type>) =
4947
TwoStepAggregator.Factory(
5048
getReturnTypeOrNull = preserveReturnTypeNullIfEmpty,
5149
stepOneAggregator = aggregator,
5250
stepTwoAggregator = aggregator,
53-
preservesType = true,
5451
)
5552

5653
/**
@@ -74,17 +71,15 @@ internal object Aggregators {
7471
* -> Return?
7572
* ```
7673
*
77-
* It can also be used as a "simple" aggregator by providing the same function for both steps,
78-
* requires [preservesType][org.jetbrains.kotlinx.dataframe.impl.aggregation.aggregators.TwoStepAggregator.preservesType] be set to `true`.
74+
* It can also be used as a "simple" aggregator by providing the same function for both steps.
7975
*
8076
* See [FlatteningAggregator][org.jetbrains.kotlinx.dataframe.impl.aggregation.aggregators.FlatteningAggregator] for different behavior for multiple columns.
8177
*
8278
* @param name The name of this aggregator.
83-
* @param getReturnTypeOrNull Functional argument for the [calculateReturnTypeOrNull][org.jetbrains.kotlinx.dataframe.impl.aggregation.aggregators.AggregatorBase.calculateReturnTypeOrNull] function.
79+
* @param getReturnTypeOrNull Functional argument for the [calculateReturnTypeOrNull][org.jetbrains.kotlinx.dataframe.impl.aggregation.aggregators.TwoStepAggregator.calculateReturnTypeOrNull] function.
8480
* @param stepOneAggregator Functional argument for the [aggregate][org.jetbrains.kotlinx.dataframe.impl.aggregation.aggregators.TwoStepAggregator.aggregate] function, used within a [DataColumn][org.jetbrains.kotlinx.dataframe.DataColumn] or [Iterable].
8581
* @param stepTwoAggregator Functional argument for the aggregation function used between different columns.
8682
* It is run on the results of [stepOneAggregator].
87-
* @param preservesType If `true`, [Value][Value]` == `[Return][Return].
8883
*/
8984
private fun <Value, Return> twoStepChangingType(
9085
getReturnTypeOrNull: CalculateReturnTypeOrNull,
@@ -94,7 +89,6 @@ internal object Aggregators {
9489
getReturnTypeOrNull = getReturnTypeOrNull,
9590
stepOneAggregator = stepOneAggregator,
9691
stepTwoAggregator = stepTwoAggregator,
97-
preservesType = false,
9892
)
9993

10094
/**
@@ -121,16 +115,14 @@ internal object Aggregators {
121115
* See [TwoStepAggregator][org.jetbrains.kotlinx.dataframe.impl.aggregation.aggregators.TwoStepAggregator] for different behavior for multiple columns.
122116
*
123117
* @param name The name of this aggregator.
124-
* @param getReturnTypeOrNull Functional argument for the [calculateReturnTypeOrNull][org.jetbrains.kotlinx.dataframe.impl.aggregation.aggregators.AggregatorBase.calculateReturnTypeOrNull] function.
118+
* @param getReturnTypeOrNull Functional argument for the [calculateReturnTypeOrNull][org.jetbrains.kotlinx.dataframe.impl.aggregation.aggregators.FlatteningAggregator.calculateReturnTypeOrNull] function.
125119
* @param aggregator Functional argument for the [aggregate][org.jetbrains.kotlinx.dataframe.impl.aggregation.aggregators.FlatteningAggregator.aggregate] function.
126120
* Note that it must be able to handle `null` values for the [Iterable] overload of [aggregate][org.jetbrains.kotlinx.dataframe.impl.aggregation.aggregators.FlatteningAggregator.aggregate].
127-
* @param preservesType If `true`, [Value][Value]` == `[Return][Return].
128121
*/
129122
private fun <Type> flatteningPreservingTypes(aggregate: Aggregate<Type, Type>) =
130123
FlatteningAggregator.Factory(
131124
getReturnTypeOrNull = preserveReturnTypeNullIfEmpty,
132125
aggregator = aggregate,
133-
preservesType = true,
134126
)
135127

136128
/**
@@ -157,18 +149,16 @@ internal object Aggregators {
157149
* See [TwoStepAggregator][org.jetbrains.kotlinx.dataframe.impl.aggregation.aggregators.TwoStepAggregator] for different behavior for multiple columns.
158150
*
159151
* @param name The name of this aggregator.
160-
* @param getReturnTypeOrNull Functional argument for the [calculateReturnTypeOrNull][org.jetbrains.kotlinx.dataframe.impl.aggregation.aggregators.AggregatorBase.calculateReturnTypeOrNull] function.
152+
* @param getReturnTypeOrNull Functional argument for the [calculateReturnTypeOrNull][org.jetbrains.kotlinx.dataframe.impl.aggregation.aggregators.FlatteningAggregator.calculateReturnTypeOrNull] function.
161153
* @param aggregator Functional argument for the [aggregate][org.jetbrains.kotlinx.dataframe.impl.aggregation.aggregators.FlatteningAggregator.aggregate] function.
162154
* Note that it must be able to handle `null` values for the [Iterable] overload of [aggregate][org.jetbrains.kotlinx.dataframe.impl.aggregation.aggregators.FlatteningAggregator.aggregate].
163-
* @param preservesType If `true`, [Value][Value]` == `[Return][Return].
164155
*/
165156
private fun <Value, Return> flatteningChangingTypes(
166157
getReturnTypeOrNull: CalculateReturnTypeOrNull,
167158
aggregate: Aggregate<Value, Return>,
168159
) = FlatteningAggregator.Factory(
169160
getReturnTypeOrNull = getReturnTypeOrNull,
170161
aggregator = aggregate,
171-
preservesType = false,
172162
)
173163

174164
/**
@@ -196,7 +186,7 @@ internal object Aggregators {
196186
* ```
197187
*
198188
* @param name The name of this aggregator.
199-
* @param getReturnTypeOrNull Functional argument for the [calculateReturnTypeOrNull][org.jetbrains.kotlinx.dataframe.impl.aggregation.aggregators.AggregatorBase.calculateReturnTypeOrNull] function.
189+
* @param getReturnTypeOrNull Functional argument for the [calculateReturnTypeOrNull][org.jetbrains.kotlinx.dataframe.impl.aggregation.aggregators.TwoStepNumbersAggregator.calculateReturnTypeOrNull] function.
200190
* @param aggregator Functional argument for the [aggregate][org.jetbrains.kotlinx.dataframe.impl.aggregation.aggregators.TwoStepNumbersAggregator.aggregate] function, used within a [DataColumn][org.jetbrains.kotlinx.dataframe.DataColumn] or [Iterable].
201191
* While it takes a [Number] argument, you can assume that all values are of the same specific type, however,
202192
* this type can be different for different calls to [aggregator][org.jetbrains.kotlinx.dataframe.impl.aggregation.aggregators.AggregatorBase.aggregator].

core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/aggregation/aggregators/FlatteningAggregator.kt

Lines changed: 15 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ package org.jetbrains.kotlinx.dataframe.impl.aggregation.aggregators
22

33
import org.jetbrains.kotlinx.dataframe.DataColumn
44
import org.jetbrains.kotlinx.dataframe.impl.commonType
5+
import kotlin.reflect.KType
56
import kotlin.reflect.full.withNullability
67

78
/**
@@ -29,13 +30,11 @@ import kotlin.reflect.full.withNullability
2930
* @param getReturnTypeOrNull Functional argument for the [calculateReturnTypeOrNull] function.
3031
* @param aggregator Functional argument for the [aggregate] function.
3132
* Note that it must be able to handle `null` values for the [Iterable] overload of [aggregate].
32-
* @param preservesType If `true`, [Value][Value]` == `[Return][Return].
3333
*/
3434
internal class FlatteningAggregator<in Value, out Return>(
3535
name: String,
3636
getReturnTypeOrNull: CalculateReturnTypeOrNull,
3737
aggregator: Aggregate<Value, Return>,
38-
override val preservesType: Boolean,
3938
) : AggregatorBase<Value, Return>(name, getReturnTypeOrNull, aggregator) {
4039

4140
/**
@@ -49,23 +48,34 @@ internal class FlatteningAggregator<in Value, out Return>(
4948
return aggregate(allValues.asIterable(), commonType)
5049
}
5150

51+
/**
52+
* Function that can give the return type of [aggregate] with columns as [KType],
53+
* given the multiple types of the input.
54+
* This allows aggregators to avoid runtime type calculations.
55+
*
56+
* @param colTypes The types of the input columns.
57+
* @param colsEmpty If `true`, all the input columns are considered empty. This often affects the return type.
58+
* @return The return type of [aggregate] as [KType].
59+
*/
60+
override fun calculateReturnTypeOrNull(colTypes: Set<KType>, colsEmpty: Boolean): KType? {
61+
val commonType = colTypes.commonType().withNullability(false)
62+
return calculateReturnTypeOrNull(commonType, colsEmpty)
63+
}
64+
5265
/**
5366
* Creates [FlatteningAggregator].
5467
*
5568
* @param getReturnTypeOrNull Functional argument for the [calculateReturnTypeOrNull] function.
5669
* @param aggregator Functional argument for the [aggregate] function.
57-
* @param preservesType If `true`, [Value][Value]` == `[Return][Return].
5870
*/
5971
class Factory<in Value, out Return>(
6072
private val getReturnTypeOrNull: CalculateReturnTypeOrNull,
6173
private val aggregator: Aggregate<Value, Return>,
62-
private val preservesType: Boolean,
6374
) : AggregatorProvider<FlatteningAggregator<Value, Return>> by AggregatorProvider({ name ->
6475
FlatteningAggregator(
6576
name = name,
6677
getReturnTypeOrNull = getReturnTypeOrNull,
6778
aggregator = aggregator,
68-
preservesType = preservesType,
6979
)
7080
})
7181
}

core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/aggregation/aggregators/TwoStepAggregator.kt

Lines changed: 21 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,9 @@
11
package org.jetbrains.kotlinx.dataframe.impl.aggregation.aggregators
22

33
import org.jetbrains.kotlinx.dataframe.DataColumn
4+
import org.jetbrains.kotlinx.dataframe.columns.isEmpty
45
import org.jetbrains.kotlinx.dataframe.impl.commonType
6+
import kotlin.reflect.KType
57
import kotlin.reflect.full.starProjectedType
68
import kotlin.reflect.full.withNullability
79

@@ -24,8 +26,7 @@ import kotlin.reflect.full.withNullability
2426
* -> Return?
2527
* ```
2628
*
27-
* It can also be used as a "simple" aggregator by providing the same function for both steps,
28-
* requires [preservesType] be set to `true`.
29+
* It can also be used as a "simple" aggregator by providing the same function for both steps.
2930
*
3031
* See [FlatteningAggregator] for different behavior for multiple columns.
3132
*
@@ -34,14 +35,12 @@ import kotlin.reflect.full.withNullability
3435
* @param stepOneAggregator Functional argument for the [aggregate] function, used within a [DataColumn] or [Iterable].
3536
* @param stepTwoAggregator Functional argument for the aggregation function used between different columns.
3637
* It is run on the results of [stepOneAggregator].
37-
* @param preservesType If `true`, [Value][Value]` == `[Return][Return].
3838
*/
3939
internal class TwoStepAggregator<in Value, out Return>(
4040
name: String,
4141
getReturnTypeOrNull: CalculateReturnTypeOrNull,
4242
stepOneAggregator: Aggregate<Value, Return>,
4343
private val stepTwoAggregator: Aggregate<Return, Return>,
44-
override val preservesType: Boolean,
4544
) : AggregatorBase<Value, Return>(name, getReturnTypeOrNull, stepOneAggregator) {
4645

4746
/**
@@ -57,7 +56,7 @@ internal class TwoStepAggregator<in Value, out Return>(
5756
val value = aggregate(col) ?: return@mapNotNull null
5857
val type = calculateReturnTypeOrNull(
5958
type = col.type().withNullability(false),
60-
emptyInput = col.size() == 0,
59+
emptyInput = col.isEmpty,
6160
) ?: value::class.starProjectedType // heavy fallback type calculation
6261

6362
value to type
@@ -66,27 +65,41 @@ internal class TwoStepAggregator<in Value, out Return>(
6665
return stepTwoAggregator(values, commonType)
6766
}
6867

68+
/**
69+
* Function that can give the return type of [aggregate] with columns as [KType],
70+
* given the multiple types of the input.
71+
* This allows aggregators to avoid runtime type calculations.
72+
*
73+
* @param colTypes The types of the input columns.
74+
* @param colsEmpty If `true`, all the input columns are considered empty. This often affects the return type.
75+
* @return The return type of [aggregate] as [KType].
76+
*/
77+
override fun calculateReturnTypeOrNull(colTypes: Set<KType>, colsEmpty: Boolean): KType? {
78+
val typesAfterStepOne = colTypes.map { type ->
79+
calculateReturnTypeOrNull(type = type.withNullability(false), emptyInput = colsEmpty)
80+
}
81+
if (typesAfterStepOne.any { it == null }) return null
82+
return typesAfterStepOne.commonType()
83+
}
84+
6985
/**
7086
* Creates [TwoStepAggregator].
7187
*
7288
* @param getReturnTypeOrNull Functional argument for the [calculateReturnTypeOrNull] function.
7389
* @param stepOneAggregator Functional argument for the [aggregate] function, used within a [DataColumn] or [Iterable].
7490
* @param stepTwoAggregator Functional argument for the aggregation function used between different columns.
7591
* It is run on the results of [stepOneAggregator].
76-
* @param preservesType If `true`, [Value][Value]` == `[Return][Return].
7792
*/
7893
class Factory<in Value, out Return>(
7994
private val getReturnTypeOrNull: CalculateReturnTypeOrNull,
8095
private val stepOneAggregator: Aggregate<Value, Return>,
8196
private val stepTwoAggregator: Aggregate<Return, Return>,
82-
private val preservesType: Boolean,
8397
) : AggregatorProvider<TwoStepAggregator<Value, Return>> by AggregatorProvider({ name ->
8498
TwoStepAggregator(
8599
name = name,
86100
getReturnTypeOrNull = getReturnTypeOrNull,
87101
stepOneAggregator = stepOneAggregator,
88102
stepTwoAggregator = stepTwoAggregator,
89-
preservesType = preservesType,
90103
)
91104
})
92105
}

0 commit comments

Comments
 (0)