Skip to content

Aggregator dependency injection, min/max, and skipNaN #1108

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 23 commits into from
Apr 4, 2025
Merged
Show file tree
Hide file tree
Changes from 19 commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
418acfe
starting min/max
Jolanrensen Mar 21, 2025
dc7502e
wip making Aggregators more modular
Jolanrensen Mar 21, 2025
37a2a1d
Merge branch 'master' into min-max
Jolanrensen Mar 24, 2025
b8efaa0
switching aggregators fully to sequences
Jolanrensen Mar 24, 2025
79be369
Rewrote Aggregator to a dependency injection pattern. This allows del…
Jolanrensen Mar 24, 2025
158597d
DefaultAggregationHandler -> ReducingAggregationHandler. Added Select…
Jolanrensen Mar 25, 2025
3629b35
Continuing aggregation refactor
Jolanrensen Mar 26, 2025
6c76219
finished types and aggregateBy for min
Jolanrensen Mar 27, 2025
751321e
added skipNaN option for mean, sum, and min
Jolanrensen Mar 27, 2025
2a90300
refactored AggregatorProvider back
Jolanrensen Mar 27, 2025
21f4ccc
update max api similar to min
Jolanrensen Mar 27, 2025
a0279cd
kdocs, cleaning, and some tiny refactorings
Jolanrensen Mar 28, 2025
2e0875b
added mean tests
Jolanrensen Mar 31, 2025
73ceaa8
enabled some previously broken sum.kt tests and fixed edge case in Nu…
Jolanrensen Mar 31, 2025
87ab8ed
added more sum tests
Jolanrensen Mar 31, 2025
12d0922
added tests and fixed NaN behavior for min/max functions
Jolanrensen Apr 1, 2025
36ed245
unified min and max statistics implementations
Jolanrensen Apr 1, 2025
60319f4
added note about reflection in input handlers
Jolanrensen Apr 2, 2025
b427b6a
separated parts of min/max tests
Jolanrensen Apr 2, 2025
973b521
Merge branch 'master' into min-max
Jolanrensen Apr 3, 2025
6c67050
ignore notebook tests if they fail because of issue #1116
Jolanrensen Apr 3, 2025
348b84b
added overloads for binary compatibility with added skipNaN argument.…
Jolanrensen Apr 4, 2025
b4fd200
adding docs based on feedback
Jolanrensen Apr 4, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
557 changes: 307 additions & 250 deletions core/api/core.api

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import org.jetbrains.kotlinx.dataframe.columns.ColumnGroup
import org.jetbrains.kotlinx.dataframe.columns.ColumnKind
import org.jetbrains.kotlinx.dataframe.columns.FrameColumn
import org.jetbrains.kotlinx.dataframe.columns.ValueColumn
import org.jetbrains.kotlinx.dataframe.impl.isIntraComparable
import org.jetbrains.kotlinx.dataframe.impl.isMixedNumber
import org.jetbrains.kotlinx.dataframe.impl.isPrimitiveNumber
import org.jetbrains.kotlinx.dataframe.impl.isPrimitiveOrMixedNumber
Expand All @@ -18,11 +19,7 @@ import org.jetbrains.kotlinx.dataframe.util.IS_INTER_COMPARABLE_IMPORT
import kotlin.contracts.ExperimentalContracts
import kotlin.contracts.contract
import kotlin.reflect.KType
import kotlin.reflect.KTypeProjection
import kotlin.reflect.KVariance
import kotlin.reflect.full.createType
import kotlin.reflect.full.isSubtypeOf
import kotlin.reflect.full.withNullability
import kotlin.reflect.typeOf

public fun AnyCol.isColumnGroup(): Boolean {
Expand Down Expand Up @@ -93,13 +90,4 @@ public fun AnyCol.isComparable(): Boolean = valuesAreComparable()
*
* Technically, this means the values' common type `T(?)` is a subtype of [Comparable]`<in T>(?)`
*/
public fun AnyCol.valuesAreComparable(): Boolean =
isValueColumn() &&
isSubtypeOf(
Comparable::class.createType(
arguments = listOf(
KTypeProjection(KVariance.IN, type().withNullability(false)),
),
nullable = hasNulls(),
),
)
public fun AnyCol.valuesAreComparable(): Boolean = isValueColumn() && type().isIntraComparable()
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
package org.jetbrains.kotlinx.dataframe.api

@PublishedApi
internal val skipNA_default: Boolean = false
internal val skipNaN_default: Boolean = false

@PublishedApi
internal val ddof_default: Int = 1
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@ import org.jetbrains.kotlinx.dataframe.documentation.NaN
import org.jetbrains.kotlinx.dataframe.documentation.SelectingColumns
import org.jetbrains.kotlinx.dataframe.get
import org.jetbrains.kotlinx.dataframe.typeClass
import kotlin.contracts.ExperimentalContracts
import kotlin.contracts.contract
import kotlin.reflect.KProperty

// region fillNulls
Expand Down Expand Up @@ -300,6 +302,14 @@ public fun <T, C> DataFrame<T>.fillNulls(vararg columns: ColumnReference<C>): Up

internal inline val Any?.isNaN: Boolean get() = (this is Double && isNaN()) || (this is Float && isNaN())

@JvmName("isNaWithContract")
@Suppress("NOTHING_TO_INLINE")
@OptIn(ExperimentalContracts::class)
internal inline fun <T : Any?> T.isNA(): Boolean {
contract { returns(false) implies (this@isNA != null) }
return isNA
}

internal inline val Any?.isNA: Boolean
get() = when (this) {
null -> true
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -109,4 +109,5 @@ public class ReducedGroupBy<T, G>(
override fun toString(): String = "ReducedGroupBy(groupBy=$groupBy, reducer=$reducer)"
}

@PublishedApi
internal fun <T, G> GroupBy<T, G>.reduce(reducer: Selector<DataFrame<G>, DataRow<G>?>) = ReducedGroupBy(this, reducer)

Large diffs are not rendered by default.

Loading