Skip to content

Commit a0279cd

Browse files
committed
kdocs, cleaning, and some tiny refactorings
1 parent 21f4ccc commit a0279cd

25 files changed

+705
-504
lines changed

core/api/core.api

Lines changed: 274 additions & 236 deletions
Large diffs are not rendered by default.

core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/sum.kt

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,14 +23,12 @@ import org.jetbrains.kotlinx.dataframe.impl.aggregation.modes.aggregateFor
2323
import org.jetbrains.kotlinx.dataframe.impl.aggregation.modes.aggregateOf
2424
import org.jetbrains.kotlinx.dataframe.impl.aggregation.modes.aggregateOfRow
2525
import org.jetbrains.kotlinx.dataframe.impl.aggregation.primitiveOrMixedNumberColumns
26-
import org.jetbrains.kotlinx.dataframe.impl.canBeNaN
2726
import org.jetbrains.kotlinx.dataframe.impl.columns.toNumberColumns
2827
import org.jetbrains.kotlinx.dataframe.impl.isPrimitiveOrMixedNumber
2928
import kotlin.experimental.ExperimentalTypeInference
3029
import kotlin.reflect.KClass
3130
import kotlin.reflect.KProperty
3231
import kotlin.reflect.KType
33-
import kotlin.reflect.full.isSubtypeOf
3432
import kotlin.reflect.typeOf
3533

3634
/* TODO KDocs

core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/aggregation/aggregators/Aggregator.kt

Lines changed: 100 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -12,32 +12,30 @@ import kotlin.reflect.KType
1212
import kotlin.reflect.full.withNullability
1313

1414
/**
15-
* Base interface for all aggregators.
15+
* This class is the main entry-point for creating an aggregator.
1616
*
17-
* Aggregators are used to compute a single value from an [Iterable] of values, a single [DataColumn],
18-
* or multiple [DataColumns][DataColumn].
17+
* Aggregators are used to compute a single value from a [Sequence] of values,
18+
* a single [DataColumn], or multiple [DataColumns][DataColumn].
1919
*
2020
* [Aggregator] follows a dependency injection pattern:
2121
*
2222
* Using the constructor or [Aggregator.invoke] function, you can create an [Aggregator] instance with a choice of:
23-
* - [AggregatorAggregationHandler] - the base functionality of the aggregator,
24-
* which computes the result from the input values.
23+
* - [AggregatorInputHandler] - {@include [AggregatorInputHandler]}
2524
*
26-
* Options: [ReducingAggregationHandler], [SelectingAggregationHandler]
25+
* Options: [NumberInputHandler], [AnyInputHandler]
2726
*
28-
* - [AggregatorInputHandler] - the input handler,
29-
* which handles specific type checks, conversion, and preprocessing of the input values.
27+
* - [AggregatorAggregationHandler] - {@include [AggregatorAggregationHandler]}
3028
*
31-
* Options: [NumberInputHandler], [AnyInputHandler]
29+
* Options: [ReducingAggregationHandler], [SelectingAggregationHandler]
3230
*
33-
* - [AggregatorMultipleColumnsHandler] - the multiple columns handler, which specifies how to aggregate multiple columns.
31+
* - [AggregatorMultipleColumnsHandler] - {@include [AggregatorMultipleColumnsHandler]}
3432
*
3533
* Options: [FlatteningMultipleColumnsHandler], [TwoStepMultipleColumnsHandler], [NoMultipleColumnsHandler]
3634
*
37-
*
38-
* @param Value The type of the values to be aggregated.
35+
* @param Value The non-null type of the values to be aggregated.
3936
* The input can always have nulls, they are filtered out.
4037
* @param Return The type of the resulting value. Can optionally be nullable.
38+
* @see [invoke]
4139
*/
4240
@PublishedApi
4341
internal class Aggregator<in Value : Any, out Return : Any?>(
@@ -49,13 +47,6 @@ internal class Aggregator<in Value : Any, out Return : Any?>(
4947
AggregatorMultipleColumnsHandler<Value, Return> by multipleColumnsHandler,
5048
AggregatorAggregationHandler<Value, Return> by aggregationHandler {
5149

52-
constructor(other: Aggregator<Value, Return>) : this(
53-
name = other.name,
54-
aggregationHandler = other,
55-
inputHandler = other,
56-
multipleColumnsHandler = other,
57-
)
58-
5950
// Set the aggregator reference in all handlers to this instance
6051
init {
6152
aggregationHandler.init(this)
@@ -73,8 +64,15 @@ internal class Aggregator<in Value : Any, out Return : Any?>(
7364
override fun toString(): String =
7465
"Aggregator(name='$name', aggregationHandler=$aggregationHandler, inputHandler=$inputHandler, multipleColumnsHandler=$multipleColumnsHandler)"
7566

76-
companion object {
77-
operator fun <Value : Any, Return : Any?> invoke(
67+
internal companion object {
68+
69+
/**
70+
* Factory function for creating an [Aggregator] instance given a name.
71+
*
72+
* @see AggregatorProvider
73+
* @see Aggregator
74+
*/
75+
internal operator fun <Value : Any, Return : Any?> invoke(
7876
aggregationHandler: AggregatorAggregationHandler<Value, Return>,
7977
inputHandler: AggregatorInputHandler<Value, Return>,
8078
multipleColumnsHandler: AggregatorMultipleColumnsHandler<Value, Return>,
@@ -90,18 +88,36 @@ internal class Aggregator<in Value : Any, out Return : Any?>(
9088
}
9189
}
9290

91+
/**
92+
* Performs aggregation on the given [values], taking [valueType] into account.
93+
* If [valueType] is unknown, see [calculateValueType] or [aggregateCalculatingValueType].
94+
*/
9395
@PublishedApi
9496
internal fun <Value : Any, Return : Any?> Aggregator<Value, Return>.aggregate(
9597
values: Sequence<Value?>,
9698
valueType: ValueType,
97-
) = aggregateSingleSequence(values, valueType)
99+
) = aggregateSequence(values, valueType)
98100

101+
/**
102+
* Performs aggregation on the given [values], taking [valueType] into account.
103+
* If [valueType] is unknown, see [calculateValueType] or [aggregateCalculatingValueType].
104+
*/
99105
@PublishedApi
100106
internal fun <Value : Any, Return : Any?> Aggregator<Value, Return>.aggregate(
101107
values: Sequence<Value?>,
102108
valueType: KType,
103-
) = aggregate(values, valueType.toValueType())
109+
) = aggregate(values, valueType.toValueType(needsFullConversion = false))
104110

111+
/**
112+
* If the specific [ValueType] of the input is not known, but you still want to call [aggregate],
113+
* this function can be called to calculate it by combining the set of known [valueTypes] or
114+
* by gathering the types from [values].
115+
*
116+
* This is a helper function that calls the correct
117+
* [AggregatorInputHandler.calculateValueType] based on the given input.
118+
*
119+
* Giving [valueTypes] is preferred because of efficiency, as it allows for avoiding runtime type checks.
120+
*/
105121
internal fun <Value : Any, Return : Any?> Aggregator<Value, Return>.calculateValueType(
106122
values: Sequence<Value?>,
107123
valueTypes: Set<KType>? = null,
@@ -111,31 +127,66 @@ internal fun <Value : Any, Return : Any?> Aggregator<Value, Return>.calculateVal
111127
calculateValueType(values)
112128
}
113129

130+
/**
131+
* If the specific [ValueType] of the input is not known, but you still want to call [aggregate],
132+
* this function can be called to calculate it by combining the set of known [valueTypes] or
133+
* by gathering the types from [values] and then aggregating them.
134+
*
135+
* Giving [valueTypes] is preferred because of efficiency, as it allows for avoiding runtime type checks.
136+
*/
114137
internal fun <Value : Any, Return : Any?> Aggregator<Value, Return>.aggregateCalculatingValueType(
115138
values: Sequence<Value?>,
116139
valueTypes: Set<KType>? = null,
117-
) = aggregateSingleSequence(
140+
) = aggregateSequence(
118141
values = values,
119142
valueType = calculateValueType(values, valueTypes),
120143
)
121144

145+
/**
146+
* Aggregates the data in the given column and computes a single resulting value.
147+
*/
122148
internal fun <Value : Any, Return : Any?> Aggregator<Value, Return>.aggregate(column: DataColumn<Value?>) =
123149
aggregateSingleColumn(column)
124150

151+
/**
152+
* Aggregates the data in the given columns and computes a single resulting value.
153+
*/
125154
internal fun <Value : Any, Return : Any?> Aggregator<Value, Return>.aggregate(columns: Sequence<DataColumn<Value?>>) =
126155
aggregateMultipleColumns(columns)
127156

157+
/**
158+
* Gives the index of the aggregation result in the input [values], if it applies.
159+
* This is used for aggregators with an [AggregatorAggregationHandler] where
160+
* [Value][Value]` == `[Return][Return], and where the result exists in the input.
161+
*
162+
* Like for [SelectingAggregationHandler].
163+
*
164+
* Defaults to `-1`.
165+
*
166+
* If [valueType] is unknown, see [calculateValueType]
167+
*/
128168
@PublishedApi
129169
internal fun <Value : Return & Any, Return : Any?> Aggregator<Value, Return>.indexOfAggregationResult(
130170
values: Sequence<Value?>,
131171
valueType: ValueType,
132172
): Int = indexOfAggregationResultSingleSequence(values, valueType)
133173

174+
/**
175+
* Gives the index of the aggregation result in the input [values], if it applies.
176+
* This is used for aggregators with an [AggregatorAggregationHandler] where
177+
* [Value][Value]` == `[Return][Return], and where the result exists in the input.
178+
*
179+
* Like for [SelectingAggregationHandler].
180+
*
181+
* Defaults to `-1`.
182+
*
183+
* If [valueType] is unknown, see [calculateValueType]
184+
*/
134185
@PublishedApi
135186
internal fun <Value : Return & Any, Return : Any?> Aggregator<Value, Return>.indexOfAggregationResult(
136187
values: Sequence<Value?>,
137188
valueType: KType,
138-
): Int = indexOfAggregationResultSingleSequence(values, valueType.toValueType())
189+
): Int = indexOfAggregationResultSingleSequence(values, valueType.toValueType(needsFullConversion = false))
139190

140191
@Suppress("UNCHECKED_CAST")
141192
@PublishedApi
@@ -146,20 +197,35 @@ internal fun <Type : Any?> Aggregator<*, *>.cast(): Aggregator<Type & Any, Type>
146197
internal fun <Value : Any, Return : Any?> Aggregator<*, *>.cast2(): Aggregator<Value, Return> =
147198
this as Aggregator<Value, Return>
148199

149-
/** Type alias for [Aggregator.calculateReturnTypeMultipleColumnsOrNull] */
150-
internal typealias CalculateReturnTypeOrNull = (type: KType, emptyInput: Boolean) -> KType?
200+
/**
201+
* Type alias for a function that gives the return type of a [Reducer] or [Selector]
202+
* given some input type and whether the input is empty.
203+
*/
204+
internal typealias CalculateReturnType = (type: KType, emptyInput: Boolean) -> KType
151205

152206
/**
153-
* Type alias for the argument for [Aggregator.aggregateSingleSequence].
154-
* Nulls have already been filtered out when this argument is called.
207+
* Type alias for a reducer function where the type of the values is provided as [KType].
208+
* Nulls have already been filtered out when this function is called.
155209
*/
156-
internal typealias Reducer<Value, Return> = Sequence<Value & Any>.(type: KType) -> Return
210+
internal typealias Reducer<Value, Return> = Sequence<Value & Any>.(valueType: KType) -> Return
157211

158-
internal typealias IndexOfResult<Value> = Sequence<Value?>.(type: KType) -> Int
212+
/**
213+
* Type alias for a selector function where the type of the values is provided as [KType].
214+
*
215+
* It is expected that [Value][Value]` : `[Return][Return]` & `[Any][Any], and [Return][Return]` : `[Any?][Any].
216+
*
217+
* Nulls have already been filtered out when this function is called.
218+
*/
219+
internal typealias Selector<Value, Return> = Sequence<Value & Any>.(type: KType) -> Return
159220

160-
internal typealias IsBetterThanSelector<Value> = (Value & Any).(other: Value & Any, valueType: KType) -> Boolean
221+
/**
222+
* Type alias for a function that returns the index of the result of [Selector] in this sequence.
223+
* If the result is not in the sequence, it returns -1.
224+
* The type of the values is provided as [KType] and the sequence can contain nulls.
225+
*/
226+
internal typealias IndexOfResult<Value> = Sequence<Value?>.(type: KType) -> Int
161227

162-
/** Common case for [CalculateReturnTypeOrNull], preserves return type, but makes it nullable for empty inputs. */
163-
internal val preserveReturnTypeNullIfEmpty: CalculateReturnTypeOrNull = { type, emptyInput ->
228+
/** Common case for [CalculateReturnType], preserves return type, but makes it nullable for empty inputs. */
229+
internal val preserveReturnTypeNullIfEmpty: CalculateReturnType = { type, emptyInput ->
164230
type.withNullability(emptyInput)
165231
}

core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/aggregation/aggregators/AggregatorAggregationHandler.kt

Lines changed: 16 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,48 +1,51 @@
11
package org.jetbrains.kotlinx.dataframe.impl.aggregation.aggregators
22

33
import org.jetbrains.kotlinx.dataframe.DataColumn
4+
import org.jetbrains.kotlinx.dataframe.impl.aggregation.aggregators.aggregationHandlers.SelectingAggregationHandler
45
import kotlin.reflect.KType
56

7+
/**
8+
* The base functionality of the aggregator,
9+
* which defines how the aggregation of a single [Sequence] or [column][DataColumn] is done.
10+
* It also provides information on which return type will be given, as [KType], given a [value type][ValueType].
11+
* It can also provide the index of the result in the input values if it is a selecting aggregator.
12+
*/
613
@PublishedApi
714
internal interface AggregatorAggregationHandler<in Value : Any, out Return : Any?> : AggregatorHandler<Value, Return> {
815

916
/**
1017
* Base function of [Aggregator].
1118
*
1219
* Aggregates the given values, taking [valueType] into account,
13-
* filtering nulls (only if [type.isMarkedNullable][kotlin.reflect.KType.isMarkedNullable]),
20+
* filtering nulls (only if [valueType.type.isMarkedNullable][KType.isMarkedNullable]),
1421
* and computes a single resulting value.
1522
*
16-
* When using [AggregatorAggregationHandler], this can be supplied by the [AggregatorAggregationHandler.aggregateSingle] argument.
17-
*
18-
* When the exact [valueType] is unknown, use [aggregateCalculatingValueType].
23+
* When the exact [valueType] is unknown, use [calculateValueType] or [aggregateCalculatingValueType].
1924
*/
20-
fun aggregateSingleSequence(values: Sequence<Value?>, valueType: ValueType): Return
25+
fun aggregateSequence(values: Sequence<Value?>, valueType: ValueType): Return
2126

2227
/**
2328
* Aggregates the data in the given column and computes a single resulting value.
24-
* Calls [aggregateSingleColumn] (with [Iterable] and [kotlin.reflect.KType]).
25-
*
26-
* See [AggregatorAggregationHandler.aggregateSingleSequence].
29+
* Calls [aggregateSequence].
2730
*/
2831
fun aggregateSingleColumn(column: DataColumn<Value?>): Return
2932

3033
/**
31-
* Function that can give the return type of [aggregateSingleSequence] as [kotlin.reflect.KType], given the type of the input.
34+
* Function that can give the return type of [aggregateSequence] as [KType], given the type of the input.
3235
* This allows aggregators to avoid runtime type calculations.
3336
*
34-
* @param type The type of the input values.
37+
* @param valueType The type of the input values.
3538
* @param emptyInput If `true`, the input values are considered empty. This often affects the return type.
36-
* @return The return type of [aggregateSingleSequence] as [kotlin.reflect.KType].
39+
* @return The return type of [aggregateSequence] as [KType].
3740
*/
38-
fun calculateReturnTypeOrNull(type: KType, emptyInput: Boolean): KType?
41+
fun calculateReturnType(valueType: KType, emptyInput: Boolean): KType
3942

4043
/**
4144
* Function that can give the index of the aggregation result in the input [values], if it applies.
4245
* This is used for [AggregatorAggregationHandlers][AggregatorAggregationHandler] where
4346
* [Value][Value]` == `[Return][Return], and where the result exists in the input.
4447
*
45-
* Like for [org.jetbrains.kotlinx.dataframe.impl.aggregation.aggregators.aggregationHandlers.SelectingAggregationHandler].
48+
* Like for [SelectingAggregationHandler].
4649
*
4750
* Defaults to `-1`.
4851
*/

core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/aggregation/aggregators/AggregatorHandler.kt

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,12 @@
11
package org.jetbrains.kotlinx.dataframe.impl.aggregation.aggregators
22

3+
/**
4+
* Common interface for [Aggregator] handlers or "injector" objects that can build up an [Aggregator] instance.
5+
*
6+
* When an [Aggregator] is instantiated,
7+
* the [init] function of each [AggregatorAggregationHandlers][AggregatorAggregationHandler] is called,
8+
* which allows the handler to refer to [Aggregator] instance via [aggregator].
9+
*/
310
internal interface AggregatorHandler<in Value : Any, out Return : Any?> {
411

512
/**

core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/aggregation/aggregators/AggregatorInputHandler.kt

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,13 +2,35 @@ package org.jetbrains.kotlinx.dataframe.impl.aggregation.aggregators
22

33
import kotlin.reflect.KType
44

5+
/**
6+
* The input handler of the aggregator,
7+
* which handles type checks, conversions, and preprocessing of a single sequence of input values.
8+
* It can also calculate a specific [value type][ValueType] from the input values or input types
9+
* if the (specific) type is not known.
10+
*/
511
internal interface AggregatorInputHandler<in Value : Any, out Return : Any?> : AggregatorHandler<Value, Return> {
612

13+
/**
14+
* If the specific [ValueType] of the input is not known, but you still want to call [aggregate],
15+
* this function can be called to calculate it by combining the set of known [valueTypes].
16+
*/
717
fun calculateValueType(valueTypes: Set<KType>): ValueType
818

9-
// heavy!
19+
/**
20+
* WARNING: HEAVY!
21+
*
22+
* If the specific [ValueType] of the input is not known, but you still want to call [aggregate],
23+
* this function can be called to calculate it by getting the types of [values] at runtime.
24+
*/
1025
fun calculateValueType(values: Sequence<Value?>): ValueType
1126

27+
/**
28+
* Preprocesses the input values before aggregation.
29+
* It's expected that this function converts [values] to the right [valueType.kType][ValueType.kType]
30+
* if [valueType.needsFullConversion][ValueType.needsFullConversion].
31+
*
32+
* @return A pair of the preprocessed values and the (potentially new) type of the values.
33+
*/
1234
fun preprocessAggregation(
1335
values: Sequence<Value?>,
1436
valueType: ValueType,
Original file line numberDiff line numberDiff line change
@@ -1,23 +1,30 @@
11
package org.jetbrains.kotlinx.dataframe.impl.aggregation.aggregators
22

33
import org.jetbrains.kotlinx.dataframe.DataColumn
4+
import org.jetbrains.kotlinx.dataframe.impl.aggregation.aggregators.Aggregator
45
import kotlin.reflect.KType
56

6-
internal interface AggregatorMultipleColumnsHandler<in Value : Any, out Return : Any?> : AggregatorHandler<Value, Return> {
7+
/**
8+
* The multiple columns handler,
9+
* which specifies how to aggregate multiple columns into a single value by using the supplied
10+
* [AggregatorAggregationHandler].
11+
* It can also calculate the return type of the aggregation given all input column types.
12+
*/
13+
internal interface AggregatorMultipleColumnsHandler<in Value : Any, out Return : Any?> :
14+
AggregatorHandler<Value, Return> {
715

816
/**
917
* Aggregates the data in the multiple given columns and computes a single resulting value.
18+
* Calls [Aggregator.aggregateSequence] or [Aggregator.aggregateSingleColumn].
1019
*/
1120
fun aggregateMultipleColumns(columns: Sequence<DataColumn<Value?>>): Return
1221

1322
/**
14-
* Function that can give the return type of [aggregateSingleSequence] with columns as [kotlin.reflect.KType],
15-
* given the multiple types of the input.
23+
* Function that can give the return type of [aggregateMultipleColumns], given types of the columns.
1624
* This allows aggregators to avoid runtime type calculations.
1725
*
1826
* @param colTypes The types of the input columns.
1927
* @param colsEmpty If `true`, all the input columns are considered empty. This often affects the return type.
20-
* @return The return type of [aggregateSingleSequence] as [kotlin.reflect.KType].
2128
*/
22-
fun calculateReturnTypeMultipleColumnsOrNull(colTypes: Set<KType>, colsEmpty: Boolean): KType?
29+
fun calculateReturnTypeMultipleColumns(colTypes: Set<KType>, colsEmpty: Boolean): KType
2330
}

0 commit comments

Comments
 (0)