diff --git a/core/api/core.api b/core/api/core.api index 74755d8bbc..8a7e2a490e 100644 --- a/core/api/core.api +++ b/core/api/core.api @@ -3486,63 +3486,108 @@ public final class org/jetbrains/kotlinx/dataframe/api/ParserOptions { public final class org/jetbrains/kotlinx/dataframe/api/PercentileKt { public static final fun percentile (Lorg/jetbrains/kotlinx/dataframe/DataColumn;D)Ljava/lang/Comparable; - public static final fun percentile (Lorg/jetbrains/kotlinx/dataframe/DataFrame;D)Lorg/jetbrains/kotlinx/dataframe/DataRow; + public static final fun percentile (Lorg/jetbrains/kotlinx/dataframe/DataColumn;DZ)D public static final fun percentile (Lorg/jetbrains/kotlinx/dataframe/DataFrame;DLkotlin/jvm/functions/Function2;)Ljava/lang/Comparable; - public static final fun percentile (Lorg/jetbrains/kotlinx/dataframe/DataFrame;D[Ljava/lang/String;)Ljava/lang/Object; + public static final fun percentile (Lorg/jetbrains/kotlinx/dataframe/DataFrame;DZ)Lorg/jetbrains/kotlinx/dataframe/DataRow; + public static final fun percentile (Lorg/jetbrains/kotlinx/dataframe/DataFrame;DZLkotlin/jvm/functions/Function2;)D + public static final fun percentile (Lorg/jetbrains/kotlinx/dataframe/DataFrame;D[Ljava/lang/String;Z)Ljava/lang/Object; public static final fun percentile (Lorg/jetbrains/kotlinx/dataframe/DataFrame;D[Lkotlin/reflect/KProperty;)Ljava/lang/Comparable; + public static final fun percentile (Lorg/jetbrains/kotlinx/dataframe/DataFrame;D[Lkotlin/reflect/KProperty;Z)D public static final fun percentile (Lorg/jetbrains/kotlinx/dataframe/DataFrame;D[Lorg/jetbrains/kotlinx/dataframe/columns/ColumnReference;)Ljava/lang/Comparable; - public static final fun percentile (Lorg/jetbrains/kotlinx/dataframe/api/Grouped;D)Lorg/jetbrains/kotlinx/dataframe/DataFrame; - public static final fun percentile (Lorg/jetbrains/kotlinx/dataframe/api/Grouped;DLjava/lang/String;Lkotlin/jvm/functions/Function2;)Lorg/jetbrains/kotlinx/dataframe/DataFrame; - public static final fun percentile (Lorg/jetbrains/kotlinx/dataframe/api/Grouped;D[Ljava/lang/String;Ljava/lang/String;)Lorg/jetbrains/kotlinx/dataframe/DataFrame; - public static final fun percentile (Lorg/jetbrains/kotlinx/dataframe/api/Grouped;D[Lkotlin/reflect/KProperty;Ljava/lang/String;)Lorg/jetbrains/kotlinx/dataframe/DataFrame; - public static final fun percentile (Lorg/jetbrains/kotlinx/dataframe/api/Grouped;D[Lorg/jetbrains/kotlinx/dataframe/columns/ColumnReference;Ljava/lang/String;)Lorg/jetbrains/kotlinx/dataframe/DataFrame; - public static final fun percentile (Lorg/jetbrains/kotlinx/dataframe/api/Pivot;DLkotlin/jvm/functions/Function2;)Lorg/jetbrains/kotlinx/dataframe/DataRow; - public static final fun percentile (Lorg/jetbrains/kotlinx/dataframe/api/Pivot;DZ)Lorg/jetbrains/kotlinx/dataframe/DataRow; - public static final fun percentile (Lorg/jetbrains/kotlinx/dataframe/api/Pivot;D[Ljava/lang/String;)Lorg/jetbrains/kotlinx/dataframe/DataRow; - public static final fun percentile (Lorg/jetbrains/kotlinx/dataframe/api/Pivot;D[Lkotlin/reflect/KProperty;)Lorg/jetbrains/kotlinx/dataframe/DataRow; - public static final fun percentile (Lorg/jetbrains/kotlinx/dataframe/api/Pivot;D[Lorg/jetbrains/kotlinx/dataframe/columns/ColumnReference;)Lorg/jetbrains/kotlinx/dataframe/DataRow; - public static final fun percentile (Lorg/jetbrains/kotlinx/dataframe/api/PivotGroupBy;DLkotlin/jvm/functions/Function2;)Lorg/jetbrains/kotlinx/dataframe/DataFrame; - public static final fun percentile (Lorg/jetbrains/kotlinx/dataframe/api/PivotGroupBy;DZ)Lorg/jetbrains/kotlinx/dataframe/DataFrame; - public static final fun percentile (Lorg/jetbrains/kotlinx/dataframe/api/PivotGroupBy;D[Ljava/lang/String;)Lorg/jetbrains/kotlinx/dataframe/DataFrame; - public static final fun percentile (Lorg/jetbrains/kotlinx/dataframe/api/PivotGroupBy;D[Lkotlin/reflect/KProperty;)Lorg/jetbrains/kotlinx/dataframe/DataFrame; - public static final fun percentile (Lorg/jetbrains/kotlinx/dataframe/api/PivotGroupBy;D[Lorg/jetbrains/kotlinx/dataframe/columns/ColumnReference;)Lorg/jetbrains/kotlinx/dataframe/DataFrame; - public static synthetic fun percentile$default (Lorg/jetbrains/kotlinx/dataframe/api/Grouped;DLjava/lang/String;Lkotlin/jvm/functions/Function2;ILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataFrame; - public static synthetic fun percentile$default (Lorg/jetbrains/kotlinx/dataframe/api/Grouped;D[Ljava/lang/String;Ljava/lang/String;ILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataFrame; - public static synthetic fun percentile$default (Lorg/jetbrains/kotlinx/dataframe/api/Grouped;D[Lkotlin/reflect/KProperty;Ljava/lang/String;ILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataFrame; - public static synthetic fun percentile$default (Lorg/jetbrains/kotlinx/dataframe/api/Grouped;D[Lorg/jetbrains/kotlinx/dataframe/columns/ColumnReference;Ljava/lang/String;ILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataFrame; - public static synthetic fun percentile$default (Lorg/jetbrains/kotlinx/dataframe/api/Pivot;DZILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataRow; - public static synthetic fun percentile$default (Lorg/jetbrains/kotlinx/dataframe/api/PivotGroupBy;DZILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataFrame; - public static final fun percentileFor (Lorg/jetbrains/kotlinx/dataframe/DataFrame;DLkotlin/jvm/functions/Function2;)Lorg/jetbrains/kotlinx/dataframe/DataRow; - public static final fun percentileFor (Lorg/jetbrains/kotlinx/dataframe/DataFrame;D[Ljava/lang/String;)Lorg/jetbrains/kotlinx/dataframe/DataRow; - public static final fun percentileFor (Lorg/jetbrains/kotlinx/dataframe/DataFrame;D[Lkotlin/reflect/KProperty;)Lorg/jetbrains/kotlinx/dataframe/DataRow; - public static final fun percentileFor (Lorg/jetbrains/kotlinx/dataframe/DataFrame;D[Lorg/jetbrains/kotlinx/dataframe/columns/ColumnReference;)Lorg/jetbrains/kotlinx/dataframe/DataRow; - public static final fun percentileFor (Lorg/jetbrains/kotlinx/dataframe/api/Grouped;DLkotlin/jvm/functions/Function2;)Lorg/jetbrains/kotlinx/dataframe/DataFrame; + public static final fun percentile (Lorg/jetbrains/kotlinx/dataframe/DataFrame;D[Lorg/jetbrains/kotlinx/dataframe/columns/ColumnReference;Z)D + public static final fun percentile (Lorg/jetbrains/kotlinx/dataframe/api/Grouped;DLjava/lang/String;ZLkotlin/jvm/functions/Function2;)Lorg/jetbrains/kotlinx/dataframe/DataFrame; + public static final fun percentile (Lorg/jetbrains/kotlinx/dataframe/api/Grouped;DZ)Lorg/jetbrains/kotlinx/dataframe/DataFrame; + public static final fun percentile (Lorg/jetbrains/kotlinx/dataframe/api/Grouped;D[Ljava/lang/String;Ljava/lang/String;Z)Lorg/jetbrains/kotlinx/dataframe/DataFrame; + public static final fun percentile (Lorg/jetbrains/kotlinx/dataframe/api/Grouped;D[Lkotlin/reflect/KProperty;Ljava/lang/String;Z)Lorg/jetbrains/kotlinx/dataframe/DataFrame; + public static final fun percentile (Lorg/jetbrains/kotlinx/dataframe/api/Grouped;D[Lorg/jetbrains/kotlinx/dataframe/columns/ColumnReference;Ljava/lang/String;Z)Lorg/jetbrains/kotlinx/dataframe/DataFrame; + public static final fun percentile (Lorg/jetbrains/kotlinx/dataframe/api/Pivot;DZLkotlin/jvm/functions/Function2;)Lorg/jetbrains/kotlinx/dataframe/DataRow; + public static final fun percentile (Lorg/jetbrains/kotlinx/dataframe/api/Pivot;DZZ)Lorg/jetbrains/kotlinx/dataframe/DataRow; + public static final fun percentile (Lorg/jetbrains/kotlinx/dataframe/api/Pivot;D[Ljava/lang/String;Z)Lorg/jetbrains/kotlinx/dataframe/DataRow; + public static final fun percentile (Lorg/jetbrains/kotlinx/dataframe/api/Pivot;D[Lkotlin/reflect/KProperty;Z)Lorg/jetbrains/kotlinx/dataframe/DataRow; + public static final fun percentile (Lorg/jetbrains/kotlinx/dataframe/api/Pivot;D[Lorg/jetbrains/kotlinx/dataframe/columns/ColumnReference;Z)Lorg/jetbrains/kotlinx/dataframe/DataRow; + public static final fun percentile (Lorg/jetbrains/kotlinx/dataframe/api/PivotGroupBy;DZLkotlin/jvm/functions/Function2;)Lorg/jetbrains/kotlinx/dataframe/DataFrame; + public static final fun percentile (Lorg/jetbrains/kotlinx/dataframe/api/PivotGroupBy;DZZ)Lorg/jetbrains/kotlinx/dataframe/DataFrame; + public static final fun percentile (Lorg/jetbrains/kotlinx/dataframe/api/PivotGroupBy;D[Ljava/lang/String;Z)Lorg/jetbrains/kotlinx/dataframe/DataFrame; + public static final fun percentile (Lorg/jetbrains/kotlinx/dataframe/api/PivotGroupBy;D[Lkotlin/reflect/KProperty;Z)Lorg/jetbrains/kotlinx/dataframe/DataFrame; + public static final fun percentile (Lorg/jetbrains/kotlinx/dataframe/api/PivotGroupBy;D[Lorg/jetbrains/kotlinx/dataframe/columns/ColumnReference;Z)Lorg/jetbrains/kotlinx/dataframe/DataFrame; + public static synthetic fun percentile$default (Lorg/jetbrains/kotlinx/dataframe/DataColumn;DZILjava/lang/Object;)D + public static synthetic fun percentile$default (Lorg/jetbrains/kotlinx/dataframe/DataFrame;DZILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataRow; + public static synthetic fun percentile$default (Lorg/jetbrains/kotlinx/dataframe/DataFrame;DZLkotlin/jvm/functions/Function2;ILjava/lang/Object;)D + public static synthetic fun percentile$default (Lorg/jetbrains/kotlinx/dataframe/DataFrame;D[Ljava/lang/String;ZILjava/lang/Object;)Ljava/lang/Object; + public static synthetic fun percentile$default (Lorg/jetbrains/kotlinx/dataframe/DataFrame;D[Lkotlin/reflect/KProperty;ZILjava/lang/Object;)D + public static synthetic fun percentile$default (Lorg/jetbrains/kotlinx/dataframe/DataFrame;D[Lorg/jetbrains/kotlinx/dataframe/columns/ColumnReference;ZILjava/lang/Object;)D + public static synthetic fun percentile$default (Lorg/jetbrains/kotlinx/dataframe/api/Grouped;DLjava/lang/String;ZLkotlin/jvm/functions/Function2;ILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataFrame; + public static synthetic fun percentile$default (Lorg/jetbrains/kotlinx/dataframe/api/Grouped;DZILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataFrame; + public static synthetic fun percentile$default (Lorg/jetbrains/kotlinx/dataframe/api/Grouped;D[Ljava/lang/String;Ljava/lang/String;ZILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataFrame; + public static synthetic fun percentile$default (Lorg/jetbrains/kotlinx/dataframe/api/Grouped;D[Lkotlin/reflect/KProperty;Ljava/lang/String;ZILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataFrame; + public static synthetic fun percentile$default (Lorg/jetbrains/kotlinx/dataframe/api/Grouped;D[Lorg/jetbrains/kotlinx/dataframe/columns/ColumnReference;Ljava/lang/String;ZILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataFrame; + public static synthetic fun percentile$default (Lorg/jetbrains/kotlinx/dataframe/api/Pivot;DZLkotlin/jvm/functions/Function2;ILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataRow; + public static synthetic fun percentile$default (Lorg/jetbrains/kotlinx/dataframe/api/Pivot;DZZILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataRow; + public static synthetic fun percentile$default (Lorg/jetbrains/kotlinx/dataframe/api/Pivot;D[Ljava/lang/String;ZILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataRow; + public static synthetic fun percentile$default (Lorg/jetbrains/kotlinx/dataframe/api/Pivot;D[Lkotlin/reflect/KProperty;ZILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataRow; + public static synthetic fun percentile$default (Lorg/jetbrains/kotlinx/dataframe/api/Pivot;D[Lorg/jetbrains/kotlinx/dataframe/columns/ColumnReference;ZILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataRow; + public static synthetic fun percentile$default (Lorg/jetbrains/kotlinx/dataframe/api/PivotGroupBy;DZLkotlin/jvm/functions/Function2;ILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataFrame; + public static synthetic fun percentile$default (Lorg/jetbrains/kotlinx/dataframe/api/PivotGroupBy;DZZILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataFrame; + public static synthetic fun percentile$default (Lorg/jetbrains/kotlinx/dataframe/api/PivotGroupBy;D[Ljava/lang/String;ZILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataFrame; + public static synthetic fun percentile$default (Lorg/jetbrains/kotlinx/dataframe/api/PivotGroupBy;D[Lkotlin/reflect/KProperty;ZILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataFrame; + public static synthetic fun percentile$default (Lorg/jetbrains/kotlinx/dataframe/api/PivotGroupBy;D[Lorg/jetbrains/kotlinx/dataframe/columns/ColumnReference;ZILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataFrame; + public static final fun percentileBy (Lorg/jetbrains/kotlinx/dataframe/DataFrame;DLjava/lang/String;Z)Lorg/jetbrains/kotlinx/dataframe/DataRow; + public static final fun percentileBy (Lorg/jetbrains/kotlinx/dataframe/api/GroupBy;DLjava/lang/String;Z)Lorg/jetbrains/kotlinx/dataframe/api/ReducedGroupBy; + public static final fun percentileBy (Lorg/jetbrains/kotlinx/dataframe/api/Pivot;DLjava/lang/String;Z)Lorg/jetbrains/kotlinx/dataframe/api/ReducedPivot; + public static final fun percentileBy (Lorg/jetbrains/kotlinx/dataframe/api/PivotGroupBy;DLjava/lang/String;Z)Lorg/jetbrains/kotlinx/dataframe/api/ReducedPivotGroupBy; + public static synthetic fun percentileBy$default (Lorg/jetbrains/kotlinx/dataframe/DataFrame;DLjava/lang/String;ZILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataRow; + public static synthetic fun percentileBy$default (Lorg/jetbrains/kotlinx/dataframe/api/GroupBy;DLjava/lang/String;ZILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/api/ReducedGroupBy; + public static synthetic fun percentileBy$default (Lorg/jetbrains/kotlinx/dataframe/api/Pivot;DLjava/lang/String;ZILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/api/ReducedPivot; + public static synthetic fun percentileBy$default (Lorg/jetbrains/kotlinx/dataframe/api/PivotGroupBy;DLjava/lang/String;ZILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/api/ReducedPivotGroupBy; + public static final fun percentileByOrNull (Lorg/jetbrains/kotlinx/dataframe/DataFrame;DLjava/lang/String;Z)Lorg/jetbrains/kotlinx/dataframe/DataRow; + public static synthetic fun percentileByOrNull$default (Lorg/jetbrains/kotlinx/dataframe/DataFrame;DLjava/lang/String;ZILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataRow; + public static final fun percentileFor (Lorg/jetbrains/kotlinx/dataframe/DataFrame;DZLkotlin/jvm/functions/Function2;)Lorg/jetbrains/kotlinx/dataframe/DataRow; + public static final fun percentileFor (Lorg/jetbrains/kotlinx/dataframe/DataFrame;D[Ljava/lang/String;Z)Lorg/jetbrains/kotlinx/dataframe/DataRow; + public static final fun percentileFor (Lorg/jetbrains/kotlinx/dataframe/DataFrame;D[Lkotlin/reflect/KProperty;Z)Lorg/jetbrains/kotlinx/dataframe/DataRow; + public static final fun percentileFor (Lorg/jetbrains/kotlinx/dataframe/DataFrame;D[Lorg/jetbrains/kotlinx/dataframe/columns/ColumnReference;Z)Lorg/jetbrains/kotlinx/dataframe/DataRow; + public static final fun percentileFor (Lorg/jetbrains/kotlinx/dataframe/api/Grouped;DZLkotlin/jvm/functions/Function2;)Lorg/jetbrains/kotlinx/dataframe/DataFrame; public static final fun percentileFor (Lorg/jetbrains/kotlinx/dataframe/api/Grouped;D[Ljava/lang/String;)Lorg/jetbrains/kotlinx/dataframe/DataFrame; - public static final fun percentileFor (Lorg/jetbrains/kotlinx/dataframe/api/Grouped;D[Lkotlin/reflect/KProperty;)Lorg/jetbrains/kotlinx/dataframe/DataFrame; - public static final fun percentileFor (Lorg/jetbrains/kotlinx/dataframe/api/Grouped;D[Lorg/jetbrains/kotlinx/dataframe/columns/ColumnReference;)Lorg/jetbrains/kotlinx/dataframe/DataFrame; - public static final fun percentileFor (Lorg/jetbrains/kotlinx/dataframe/api/Pivot;DZLkotlin/jvm/functions/Function2;)Lorg/jetbrains/kotlinx/dataframe/DataRow; - public static final fun percentileFor (Lorg/jetbrains/kotlinx/dataframe/api/Pivot;D[Ljava/lang/String;Z)Lorg/jetbrains/kotlinx/dataframe/DataRow; - public static final fun percentileFor (Lorg/jetbrains/kotlinx/dataframe/api/Pivot;D[Lkotlin/reflect/KProperty;Z)Lorg/jetbrains/kotlinx/dataframe/DataRow; - public static final fun percentileFor (Lorg/jetbrains/kotlinx/dataframe/api/Pivot;D[Lorg/jetbrains/kotlinx/dataframe/columns/ColumnReference;Z)Lorg/jetbrains/kotlinx/dataframe/DataRow; - public static final fun percentileFor (Lorg/jetbrains/kotlinx/dataframe/api/PivotGroupBy;DZLkotlin/jvm/functions/Function2;)Lorg/jetbrains/kotlinx/dataframe/DataFrame; - public static final fun percentileFor (Lorg/jetbrains/kotlinx/dataframe/api/PivotGroupBy;D[Ljava/lang/String;Z)Lorg/jetbrains/kotlinx/dataframe/DataFrame; - public static final fun percentileFor (Lorg/jetbrains/kotlinx/dataframe/api/PivotGroupBy;D[Lkotlin/reflect/KProperty;Z)Lorg/jetbrains/kotlinx/dataframe/DataFrame; - public static final fun percentileFor (Lorg/jetbrains/kotlinx/dataframe/api/PivotGroupBy;D[Lorg/jetbrains/kotlinx/dataframe/columns/ColumnReference;Z)Lorg/jetbrains/kotlinx/dataframe/DataFrame; - public static synthetic fun percentileFor$default (Lorg/jetbrains/kotlinx/dataframe/api/Pivot;DZLkotlin/jvm/functions/Function2;ILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataRow; - public static synthetic fun percentileFor$default (Lorg/jetbrains/kotlinx/dataframe/api/Pivot;D[Ljava/lang/String;ZILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataRow; - public static synthetic fun percentileFor$default (Lorg/jetbrains/kotlinx/dataframe/api/Pivot;D[Lkotlin/reflect/KProperty;ZILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataRow; - public static synthetic fun percentileFor$default (Lorg/jetbrains/kotlinx/dataframe/api/Pivot;D[Lorg/jetbrains/kotlinx/dataframe/columns/ColumnReference;ZILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataRow; - public static synthetic fun percentileFor$default (Lorg/jetbrains/kotlinx/dataframe/api/PivotGroupBy;DZLkotlin/jvm/functions/Function2;ILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataFrame; - public static synthetic fun percentileFor$default (Lorg/jetbrains/kotlinx/dataframe/api/PivotGroupBy;D[Ljava/lang/String;ZILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataFrame; - public static synthetic fun percentileFor$default (Lorg/jetbrains/kotlinx/dataframe/api/PivotGroupBy;D[Lkotlin/reflect/KProperty;ZILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataFrame; - public static synthetic fun percentileFor$default (Lorg/jetbrains/kotlinx/dataframe/api/PivotGroupBy;D[Lorg/jetbrains/kotlinx/dataframe/columns/ColumnReference;ZILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataFrame; + public static final fun percentileFor (Lorg/jetbrains/kotlinx/dataframe/api/Grouped;D[Lkotlin/reflect/KProperty;Z)Lorg/jetbrains/kotlinx/dataframe/DataFrame; + public static final fun percentileFor (Lorg/jetbrains/kotlinx/dataframe/api/Grouped;D[Lorg/jetbrains/kotlinx/dataframe/columns/ColumnReference;Z)Lorg/jetbrains/kotlinx/dataframe/DataFrame; + public static final fun percentileFor (Lorg/jetbrains/kotlinx/dataframe/api/Pivot;DZZLkotlin/jvm/functions/Function2;)Lorg/jetbrains/kotlinx/dataframe/DataRow; + public static final fun percentileFor (Lorg/jetbrains/kotlinx/dataframe/api/Pivot;D[Ljava/lang/String;ZZ)Lorg/jetbrains/kotlinx/dataframe/DataRow; + public static final fun percentileFor (Lorg/jetbrains/kotlinx/dataframe/api/Pivot;D[Lkotlin/reflect/KProperty;ZZ)Lorg/jetbrains/kotlinx/dataframe/DataRow; + public static final fun percentileFor (Lorg/jetbrains/kotlinx/dataframe/api/Pivot;D[Lorg/jetbrains/kotlinx/dataframe/columns/ColumnReference;ZZ)Lorg/jetbrains/kotlinx/dataframe/DataRow; + public static final fun percentileFor (Lorg/jetbrains/kotlinx/dataframe/api/PivotGroupBy;DZZLkotlin/jvm/functions/Function2;)Lorg/jetbrains/kotlinx/dataframe/DataFrame; + public static final fun percentileFor (Lorg/jetbrains/kotlinx/dataframe/api/PivotGroupBy;D[Ljava/lang/String;ZZ)Lorg/jetbrains/kotlinx/dataframe/DataFrame; + public static final fun percentileFor (Lorg/jetbrains/kotlinx/dataframe/api/PivotGroupBy;D[Lkotlin/reflect/KProperty;ZZ)Lorg/jetbrains/kotlinx/dataframe/DataFrame; + public static final fun percentileFor (Lorg/jetbrains/kotlinx/dataframe/api/PivotGroupBy;D[Lorg/jetbrains/kotlinx/dataframe/columns/ColumnReference;ZZ)Lorg/jetbrains/kotlinx/dataframe/DataFrame; + public static synthetic fun percentileFor$default (Lorg/jetbrains/kotlinx/dataframe/DataFrame;DZLkotlin/jvm/functions/Function2;ILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataRow; + public static synthetic fun percentileFor$default (Lorg/jetbrains/kotlinx/dataframe/DataFrame;D[Ljava/lang/String;ZILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataRow; + public static synthetic fun percentileFor$default (Lorg/jetbrains/kotlinx/dataframe/DataFrame;D[Lkotlin/reflect/KProperty;ZILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataRow; + public static synthetic fun percentileFor$default (Lorg/jetbrains/kotlinx/dataframe/DataFrame;D[Lorg/jetbrains/kotlinx/dataframe/columns/ColumnReference;ZILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataRow; + public static synthetic fun percentileFor$default (Lorg/jetbrains/kotlinx/dataframe/api/Grouped;DZLkotlin/jvm/functions/Function2;ILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataFrame; + public static synthetic fun percentileFor$default (Lorg/jetbrains/kotlinx/dataframe/api/Grouped;D[Lkotlin/reflect/KProperty;ZILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataFrame; + public static synthetic fun percentileFor$default (Lorg/jetbrains/kotlinx/dataframe/api/Grouped;D[Lorg/jetbrains/kotlinx/dataframe/columns/ColumnReference;ZILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataFrame; + public static synthetic fun percentileFor$default (Lorg/jetbrains/kotlinx/dataframe/api/Pivot;DZZLkotlin/jvm/functions/Function2;ILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataRow; + public static synthetic fun percentileFor$default (Lorg/jetbrains/kotlinx/dataframe/api/Pivot;D[Ljava/lang/String;ZZILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataRow; + public static synthetic fun percentileFor$default (Lorg/jetbrains/kotlinx/dataframe/api/Pivot;D[Lkotlin/reflect/KProperty;ZZILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataRow; + public static synthetic fun percentileFor$default (Lorg/jetbrains/kotlinx/dataframe/api/Pivot;D[Lorg/jetbrains/kotlinx/dataframe/columns/ColumnReference;ZZILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataRow; + public static synthetic fun percentileFor$default (Lorg/jetbrains/kotlinx/dataframe/api/PivotGroupBy;DZZLkotlin/jvm/functions/Function2;ILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataFrame; + public static synthetic fun percentileFor$default (Lorg/jetbrains/kotlinx/dataframe/api/PivotGroupBy;D[Ljava/lang/String;ZZILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataFrame; + public static synthetic fun percentileFor$default (Lorg/jetbrains/kotlinx/dataframe/api/PivotGroupBy;D[Lkotlin/reflect/KProperty;ZZILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataFrame; + public static synthetic fun percentileFor$default (Lorg/jetbrains/kotlinx/dataframe/api/PivotGroupBy;D[Lorg/jetbrains/kotlinx/dataframe/columns/ColumnReference;ZZILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataFrame; public static final fun percentileOrNull (Lorg/jetbrains/kotlinx/dataframe/DataColumn;D)Ljava/lang/Comparable; + public static final fun percentileOrNull (Lorg/jetbrains/kotlinx/dataframe/DataColumn;DZ)Ljava/lang/Double; public static final fun percentileOrNull (Lorg/jetbrains/kotlinx/dataframe/DataFrame;DLkotlin/jvm/functions/Function2;)Ljava/lang/Comparable; - public static final fun percentileOrNull (Lorg/jetbrains/kotlinx/dataframe/DataFrame;D[Ljava/lang/String;)Ljava/lang/Object; + public static final fun percentileOrNull (Lorg/jetbrains/kotlinx/dataframe/DataFrame;DZLkotlin/jvm/functions/Function2;)Ljava/lang/Double; + public static final fun percentileOrNull (Lorg/jetbrains/kotlinx/dataframe/DataFrame;D[Ljava/lang/String;Z)Ljava/lang/Object; public static final fun percentileOrNull (Lorg/jetbrains/kotlinx/dataframe/DataFrame;D[Lkotlin/reflect/KProperty;)Ljava/lang/Comparable; + public static final fun percentileOrNull (Lorg/jetbrains/kotlinx/dataframe/DataFrame;D[Lkotlin/reflect/KProperty;Z)Ljava/lang/Double; public static final fun percentileOrNull (Lorg/jetbrains/kotlinx/dataframe/DataFrame;D[Lorg/jetbrains/kotlinx/dataframe/columns/ColumnReference;)Ljava/lang/Comparable; - public static final fun rowPercentile (Lorg/jetbrains/kotlinx/dataframe/DataRow;D)Ljava/lang/Object; - public static final fun rowPercentileOrNull (Lorg/jetbrains/kotlinx/dataframe/DataRow;D)Ljava/lang/Object; + public static final fun percentileOrNull (Lorg/jetbrains/kotlinx/dataframe/DataFrame;D[Lorg/jetbrains/kotlinx/dataframe/columns/ColumnReference;Z)Ljava/lang/Double; + public static synthetic fun percentileOrNull$default (Lorg/jetbrains/kotlinx/dataframe/DataColumn;DZILjava/lang/Object;)Ljava/lang/Double; + public static synthetic fun percentileOrNull$default (Lorg/jetbrains/kotlinx/dataframe/DataFrame;DZLkotlin/jvm/functions/Function2;ILjava/lang/Object;)Ljava/lang/Double; + public static synthetic fun percentileOrNull$default (Lorg/jetbrains/kotlinx/dataframe/DataFrame;D[Ljava/lang/String;ZILjava/lang/Object;)Ljava/lang/Object; + public static synthetic fun percentileOrNull$default (Lorg/jetbrains/kotlinx/dataframe/DataFrame;D[Lkotlin/reflect/KProperty;ZILjava/lang/Object;)Ljava/lang/Double; + public static synthetic fun percentileOrNull$default (Lorg/jetbrains/kotlinx/dataframe/DataFrame;D[Lorg/jetbrains/kotlinx/dataframe/columns/ColumnReference;ZILjava/lang/Object;)Ljava/lang/Double; + public static final fun rowPercentile (Lorg/jetbrains/kotlinx/dataframe/DataRow;)Ljava/lang/Void; + public static final fun rowPercentileOrNull (Lorg/jetbrains/kotlinx/dataframe/DataRow;)Ljava/lang/Void; } public abstract interface class org/jetbrains/kotlinx/dataframe/api/Pivot : org/jetbrains/kotlinx/dataframe/aggregation/Aggregatable { @@ -5577,7 +5622,6 @@ public final class org/jetbrains/kotlinx/dataframe/impl/aggregation/aggregators/ public final class org/jetbrains/kotlinx/dataframe/impl/aggregation/aggregators/Aggregators { public static final field INSTANCE Lorg/jetbrains/kotlinx/dataframe/impl/aggregation/aggregators/Aggregators; public final fun getMean ()Lorg/jetbrains/kotlinx/dataframe/impl/aggregation/aggregators/AggregatorOptionSwitch1; - public final fun getPercentile ()Lorg/jetbrains/kotlinx/dataframe/impl/aggregation/aggregators/AggregatorOptionSwitch1; public final fun getStd ()Lorg/jetbrains/kotlinx/dataframe/impl/aggregation/aggregators/AggregatorOptionSwitch2; public final fun getSum ()Lorg/jetbrains/kotlinx/dataframe/impl/aggregation/aggregators/AggregatorOptionSwitch1; public final fun max (Z)Lorg/jetbrains/kotlinx/dataframe/impl/aggregation/aggregators/Aggregator; @@ -5585,6 +5629,9 @@ public final class org/jetbrains/kotlinx/dataframe/impl/aggregation/aggregators/ public final fun medianComparables ()Lorg/jetbrains/kotlinx/dataframe/impl/aggregation/aggregators/Aggregator; public final fun medianNumbers (Z)Lorg/jetbrains/kotlinx/dataframe/impl/aggregation/aggregators/Aggregator; public final fun min (Z)Lorg/jetbrains/kotlinx/dataframe/impl/aggregation/aggregators/Aggregator; + public final fun percentileCommon (DZ)Lorg/jetbrains/kotlinx/dataframe/impl/aggregation/aggregators/Aggregator; + public final fun percentileComparables (D)Lorg/jetbrains/kotlinx/dataframe/impl/aggregation/aggregators/Aggregator; + public final fun percentileNumbers (DZ)Lorg/jetbrains/kotlinx/dataframe/impl/aggregation/aggregators/Aggregator; } public final class org/jetbrains/kotlinx/dataframe/impl/aggregation/modes/NoAggregationKt { @@ -6545,8 +6592,7 @@ public final class org/jetbrains/kotlinx/dataframe/math/MinMaxKt { public static final fun minOrNull (Lkotlin/sequences/Sequence;Lkotlin/reflect/KType;Z)Ljava/lang/Comparable; } -public final class org/jetbrains/kotlinx/dataframe/math/PercentileKt { - public static final fun percentile (Ljava/lang/Iterable;DLkotlin/reflect/KType;)Ljava/lang/Comparable; +public final class org/jetbrains/kotlinx/dataframe/math/QuantileKt { public static final fun quickSelect (Ljava/util/List;I)Ljava/lang/Comparable; } diff --git a/core/build.gradle.kts b/core/build.gradle.kts index d15c043415..63d66d904f 100644 --- a/core/build.gradle.kts +++ b/core/build.gradle.kts @@ -64,6 +64,7 @@ dependencies { kotlinCompilerPluginClasspathSamples(projects.plugins.expressionsConverter) api(libs.commonsCsv) + implementation(libs.commonsIo) implementation(libs.serialization.core) implementation(libs.serialization.json) @@ -82,6 +83,9 @@ dependencies { testImplementation(libs.jsoup) testImplementation(libs.sl4jsimple) + // for checking results + testImplementation(libs.commonsStatisticsDescriptive) + // for samples.api testImplementation(projects.dataframeCsv) } diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/median.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/median.kt index 10092d0f9a..7fffc74598 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/median.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/median.kt @@ -38,6 +38,8 @@ import kotlin.reflect.KProperty * This needs to be explained by KDocs * * medianBy is new for all overloads :) + * Uses [QuantileEstimationMethod.R8] for primitive numbers, else [QuantileEstimationMethod.R3]. + * MedianBy also uses [QuantileEstimationMethod.R3]. */ // region DataColumn diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/percentile.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/percentile.kt index f9559bcc75..d20e671c86 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/percentile.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/percentile.kt @@ -1,3 +1,5 @@ +@file:OptIn(ExperimentalTypeInference::class) + package org.jetbrains.kotlinx.dataframe.api import org.jetbrains.kotlinx.dataframe.AnyRow @@ -8,287 +10,655 @@ import org.jetbrains.kotlinx.dataframe.DataRow import org.jetbrains.kotlinx.dataframe.RowExpression import org.jetbrains.kotlinx.dataframe.aggregation.ColumnsForAggregateSelector import org.jetbrains.kotlinx.dataframe.annotations.AccessApiOverload +import org.jetbrains.kotlinx.dataframe.annotations.Interpretable +import org.jetbrains.kotlinx.dataframe.annotations.Refine import org.jetbrains.kotlinx.dataframe.columns.ColumnReference import org.jetbrains.kotlinx.dataframe.columns.toColumnSet import org.jetbrains.kotlinx.dataframe.impl.aggregation.aggregators.Aggregators -import org.jetbrains.kotlinx.dataframe.impl.aggregation.aggregators.cast import org.jetbrains.kotlinx.dataframe.impl.aggregation.intraComparableColumns import org.jetbrains.kotlinx.dataframe.impl.aggregation.modes.aggregateAll +import org.jetbrains.kotlinx.dataframe.impl.aggregation.modes.aggregateByOrNull import org.jetbrains.kotlinx.dataframe.impl.aggregation.modes.aggregateFor import org.jetbrains.kotlinx.dataframe.impl.aggregation.modes.aggregateOf +import org.jetbrains.kotlinx.dataframe.impl.aggregation.modes.aggregateOfRow import org.jetbrains.kotlinx.dataframe.impl.columns.toComparableColumns import org.jetbrains.kotlinx.dataframe.impl.suggestIfNull -import org.jetbrains.kotlinx.dataframe.math.percentile +import org.jetbrains.kotlinx.dataframe.util.ROW_PERCENTILE +import org.jetbrains.kotlinx.dataframe.util.ROW_PERCENTILE_OR_NULL +import kotlin.experimental.ExperimentalTypeInference import kotlin.reflect.KProperty -import kotlin.reflect.typeOf + +/* TODO KDocs + * numbers -> Double or null + * comparable -> itself or null + * + * TODO cases where the lambda dictates the return type require explicit type arguments for + * non-number, comparable overloads: https://youtrack.jetbrains.com/issue/KT-76683 + * so, `df.percentile { intCol }` works, but needs `df.percentile<_, String> { stringCol }` or `df.percentile({ dateCol })` + * This needs to be explained by KDocs + * + * percentileBy is new for all overloads :) + * + * Uses [QuantileEstimationMethod.R8] for primitive numbers, else [QuantileEstimationMethod.R3]. + * PercentileBy also uses [QuantileEstimationMethod.R3]. + */ // region DataColumn -public fun > DataColumn.percentile(percentile: Double): T = +public fun ?> DataColumn.percentile(percentile: Double): T & Any = percentileOrNull(percentile).suggestIfNull("percentile") -public fun > DataColumn.percentileOrNull(percentile: Double): T? = - Aggregators.percentile(percentile).cast().aggregateSingleColumn(this) +public fun ?> DataColumn.percentileOrNull(percentile: Double): T? = + Aggregators.percentileComparables(percentile).aggregateSingleColumn(this) + +public fun DataColumn.percentile( + percentile: Double, + skipNaN: Boolean = skipNaNDefault, +): Double + where T : Comparable?, T : Number? = + percentileOrNull(percentile = percentile, skipNaN = skipNaN).suggestIfNull("percentile") + +public fun DataColumn.percentileOrNull( + percentile: Double, + skipNaN: Boolean = skipNaNDefault, +): Double? + where T : Comparable?, T : Number? = + Aggregators.percentileNumbers(percentile, skipNaN).aggregateSingleColumn(this) + +@OverloadResolutionByLambdaReturnType +public inline fun ?> DataColumn.percentileBy( + percentile: Double, + skipNaN: Boolean = skipNaNDefault, + crossinline selector: (T) -> R, +): T & Any = percentileByOrNull(percentile, skipNaN, selector).suggestIfNull("percentileBy") -public inline fun > DataColumn.percentileOfOrNull( +@OverloadResolutionByLambdaReturnType +public inline fun ?> DataColumn.percentileByOrNull( percentile: Double, - noinline expression: (T) -> R?, -): R? = Aggregators.percentile(percentile).cast().aggregateOf(this, expression) + skipNaN: Boolean = skipNaNDefault, + crossinline selector: (T) -> R, +): T? = Aggregators.percentileCommon(percentile, skipNaN).aggregateByOrNull(this, selector) -public inline fun > DataColumn.percentileOf( +// TODO, requires explicit type R due to https://youtrack.jetbrains.com/issue/KT-76683 +@OverloadResolutionByLambdaReturnType +public inline fun ?> DataColumn.percentileOf( percentile: Double, - noinline expression: (T) -> R?, -): R = percentileOfOrNull(percentile, expression).suggestIfNull("percentileOf") + crossinline expression: (T) -> R, +): R & Any = percentileOfOrNull(percentile, expression).suggestIfNull("percentileOf") + +// TODO, requires explicit type R due to https://youtrack.jetbrains.com/issue/KT-76683 +@OverloadResolutionByLambdaReturnType +public inline fun ?> DataColumn.percentileOfOrNull( + percentile: Double, + crossinline expression: (T) -> R, +): R? = Aggregators.percentileComparables(percentile).aggregateOf(this, expression) + +@OverloadResolutionByLambdaReturnType +public inline fun DataColumn.percentileOf( + percentile: Double, + skipNaN: Boolean = skipNaNDefault, + crossinline expression: (T) -> R, +): Double + where R : Comparable?, R : Number? = + percentileOfOrNull(percentile, skipNaN, expression).suggestIfNull("percentileOf") + +@OverloadResolutionByLambdaReturnType +public inline fun DataColumn.percentileOfOrNull( + percentile: Double, + skipNaN: Boolean = skipNaNDefault, + crossinline expression: (T) -> R, +): Double? + where R : Comparable?, R : Number? = + Aggregators.percentileNumbers(percentile, skipNaN).aggregateOf(this, expression) // endregion // region DataRow -public fun AnyRow.rowPercentileOrNull(percentile: Double): Any? = - Aggregators.percentile(percentile).aggregateSingleColumn( - values().filterIsInstance>().toValueColumn(), - ) +@Deprecated(ROW_PERCENTILE_OR_NULL, level = DeprecationLevel.ERROR) +public fun AnyRow.rowPercentileOrNull(): Nothing? = error(ROW_PERCENTILE_OR_NULL) -public fun AnyRow.rowPercentile(percentile: Double): Any = - rowPercentileOrNull(percentile).suggestIfNull("rowPercentile") +@Deprecated(ROW_PERCENTILE, level = DeprecationLevel.ERROR) +public fun AnyRow.rowPercentile(): Nothing = error(ROW_PERCENTILE) public inline fun > AnyRow.rowPercentileOfOrNull(percentile: Double): T? = - valuesOf().percentile(percentile, typeOf()) + Aggregators.percentileComparables(percentile).aggregateOfRow(this) { colsOf() } public inline fun > AnyRow.rowPercentileOf(percentile: Double): T = rowPercentileOfOrNull(percentile).suggestIfNull("rowPercentileOf") +public inline fun AnyRow.rowPercentileOfOrNull( + percentile: Double, + skipNaN: Boolean = skipNaNDefault, +): Double? + where T : Comparable, T : Number = + Aggregators.percentileNumbers(percentile, skipNaN).aggregateOfRow(this) { colsOf() } + +public inline fun AnyRow.rowPercentileOf( + percentile: Double, + skipNaN: Boolean = skipNaNDefault, +): Double + where T : Comparable, T : Number = + rowPercentileOfOrNull(percentile, skipNaN).suggestIfNull("rowPercentileOf") + // endregion // region DataFrame -public fun DataFrame.percentile(percentile: Double): DataRow = - percentileFor(percentile, intraComparableColumns()) +public fun DataFrame.percentile(percentile: Double, skipNaN: Boolean = skipNaNDefault): DataRow = + percentileFor(percentile, skipNaN, intraComparableColumns()) -public fun > DataFrame.percentileFor( +public fun ?> DataFrame.percentileFor( percentile: Double, - columns: ColumnsForAggregateSelector, -): DataRow = Aggregators.percentile(percentile).aggregateFor(this, columns) + skipNaN: Boolean = skipNaNDefault, + columns: ColumnsForAggregateSelector, +): DataRow = Aggregators.percentileCommon(percentile, skipNaN).aggregateFor(this, columns) -public fun DataFrame.percentileFor(percentile: Double, vararg columns: String): DataRow = - percentileFor(percentile) { columns.toComparableColumns() } +public fun DataFrame.percentileFor( + percentile: Double, + vararg columns: String, + skipNaN: Boolean = skipNaNDefault, +): DataRow = percentileFor(percentile, skipNaN) { columns.toComparableColumns() } @AccessApiOverload -public fun > DataFrame.percentileFor( +public fun ?> DataFrame.percentileFor( percentile: Double, - vararg columns: ColumnReference, -): DataRow = percentileFor(percentile) { columns.toColumnSet() } + vararg columns: ColumnReference, + skipNaN: Boolean = skipNaNDefault, +): DataRow = percentileFor(percentile, skipNaN) { columns.toColumnSet() } @AccessApiOverload -public fun > DataFrame.percentileFor( +public fun ?> DataFrame.percentileFor( + percentile: Double, + vararg columns: KProperty, + skipNaN: Boolean = skipNaNDefault, +): DataRow = percentileFor(percentile, skipNaN) { columns.toColumnSet() } + +// TODO, requires explicit type C due to https://youtrack.jetbrains.com/issue/KT-76683 +@OverloadResolutionByLambdaReturnType +public fun ?> DataFrame.percentile( + percentile: Double, + columns: ColumnsSelector, +): C & Any = percentileOrNull(percentile, columns).suggestIfNull("percentile") + +// TODO, requires explicit type C due to https://youtrack.jetbrains.com/issue/KT-76683 +@OverloadResolutionByLambdaReturnType +@Suppress("UNCHECKED_CAST") +public fun ?> DataFrame.percentileOrNull( + percentile: Double, + columns: ColumnsSelector, +): C? = Aggregators.percentileComparables(percentile).aggregateAll(this, columns) + +@OverloadResolutionByLambdaReturnType +public fun DataFrame.percentile( + percentile: Double, + skipNaN: Boolean = skipNaNDefault, + columns: ColumnsSelector, +): Double + where C : Number?, C : Comparable? = + percentileOrNull(percentile, skipNaN, columns).suggestIfNull("percentile") + +@OverloadResolutionByLambdaReturnType +@Suppress("UNCHECKED_CAST") +public fun DataFrame.percentileOrNull( percentile: Double, - vararg columns: KProperty, -): DataRow = percentileFor(percentile) { columns.toColumnSet() } + skipNaN: Boolean = skipNaNDefault, + columns: ColumnsSelector, +): Double? + where C : Comparable?, C : Number? = + Aggregators.percentileNumbers(percentile, skipNaN).aggregateAll(this, columns) -public fun > DataFrame.percentile(percentile: Double, columns: ColumnsSelector): C = - percentileOrNull(percentile, columns).suggestIfNull("percentile") +public fun DataFrame.percentile( + percentile: Double, + vararg columns: String, + skipNaN: Boolean = skipNaNDefault, +): Any = percentileOrNull(percentile, *columns, skipNaN = skipNaN).suggestIfNull("percentile") -public fun DataFrame.percentile(percentile: Double, vararg columns: String): Any = - percentile(percentile) { columns.toComparableColumns() } +public fun DataFrame.percentileOrNull( + percentile: Double, + vararg columns: String, + skipNaN: Boolean = skipNaNDefault, +): Any? = + Aggregators.percentileCommon?>(percentile, skipNaN).aggregateAll(this) { columns.toColumnSet() } @AccessApiOverload -public fun > DataFrame.percentile(percentile: Double, vararg columns: ColumnReference): C = - percentile(percentile) { columns.toColumnSet() } +public fun ?> DataFrame.percentile( + percentile: Double, + vararg columns: ColumnReference, +): C & Any = percentileOrNull(percentile, *columns).suggestIfNull("percentile") @AccessApiOverload -public fun > DataFrame.percentile(percentile: Double, vararg columns: KProperty): C = - percentile(percentile) { columns.toColumnSet() } +public fun ?> DataFrame.percentileOrNull( + percentile: Double, + vararg columns: ColumnReference, +): C? = percentileOrNull(percentile) { columns.toColumnSet() } -@Suppress("UNCHECKED_CAST") -public fun > DataFrame.percentileOrNull( +@AccessApiOverload +public fun DataFrame.percentile( percentile: Double, - columns: ColumnsSelector, -): C? = Aggregators.percentile(percentile).aggregateAll(this, columns) as C? + vararg columns: ColumnReference, + skipNaN: Boolean = skipNaNDefault, +): Double + where C : Comparable?, C : Number? = + percentileOrNull(percentile, *columns, skipNaN = skipNaN).suggestIfNull("percentile") -public fun DataFrame.percentileOrNull(percentile: Double, vararg columns: String): Any? = - percentileOrNull(percentile) { columns.toComparableColumns() } +@AccessApiOverload +public fun DataFrame.percentileOrNull( + percentile: Double, + vararg columns: ColumnReference, + skipNaN: Boolean = skipNaNDefault, +): Double? + where C : Comparable?, C : Number? = + percentileOrNull(percentile, skipNaN) { columns.toColumnSet() } @AccessApiOverload -public fun > DataFrame.percentileOrNull( +public fun ?> DataFrame.percentile( percentile: Double, - vararg columns: ColumnReference, -): C? = percentileOrNull(percentile) { columns.toColumnSet() } + vararg columns: KProperty, +): C & Any = percentileOrNull(percentile, *columns).suggestIfNull("percentile") @AccessApiOverload -public fun > DataFrame.percentileOrNull(percentile: Double, vararg columns: KProperty): C? = - percentileOrNull(percentile) { columns.toColumnSet() } +public fun ?> DataFrame.percentileOrNull( + percentile: Double, + vararg columns: KProperty, +): C? = percentileOrNull(percentile) { columns.toColumnSet() } -public inline fun > DataFrame.percentileOf( +@AccessApiOverload +public fun DataFrame.percentile( percentile: Double, - crossinline expression: RowExpression, -): R? = Aggregators.percentile(percentile).aggregateOf(this, expression) as R? + vararg columns: KProperty, + skipNaN: Boolean = skipNaNDefault, +): Double + where C : Comparable?, C : Number? = + percentileOrNull(percentile, *columns, skipNaN = skipNaN).suggestIfNull("percentile") -// endregion +@AccessApiOverload +public fun DataFrame.percentileOrNull( + percentile: Double, + vararg columns: KProperty, + skipNaN: Boolean = skipNaNDefault, +): Double? + where C : Comparable?, C : Number? = + percentileOrNull(percentile, skipNaN) { columns.toColumnSet() } + +// TODO, requires explicit type R due to https://youtrack.jetbrains.com/issue/KT-76683 +@OverloadResolutionByLambdaReturnType +public inline fun ?> DataFrame.percentileOf( + percentile: Double, + crossinline expression: RowExpression, +): R & Any = percentileOfOrNull(percentile, expression).suggestIfNull("percentileOf") -// region GroupBy +// TODO, requires explicit type R due to https://youtrack.jetbrains.com/issue/KT-76683 +@OverloadResolutionByLambdaReturnType +public inline fun ?> DataFrame.percentileOfOrNull( + percentile: Double, + crossinline expression: RowExpression, +): R? = Aggregators.percentileComparables(percentile).aggregateOf(this, expression) + +@OverloadResolutionByLambdaReturnType +public inline fun DataFrame.percentileOf( + percentile: Double, + skipNaN: Boolean = skipNaNDefault, + crossinline expression: RowExpression, +): Double + where R : Comparable?, R : Number? = + percentileOfOrNull(percentile, skipNaN, expression).suggestIfNull("percentileOf") + +@OverloadResolutionByLambdaReturnType +public inline fun DataFrame.percentileOfOrNull( + percentile: Double, + skipNaN: Boolean = skipNaNDefault, + crossinline expression: RowExpression, +): Double? + where R : Comparable?, R : Number? = + Aggregators.percentileNumbers(percentile, skipNaN).aggregateOf(this, expression) -public fun Grouped.percentile(percentile: Double): DataFrame = - percentileFor(percentile, intraComparableColumns()) +public inline fun ?> DataFrame.percentileBy( + percentile: Double, + skipNaN: Boolean = skipNaNDefault, + crossinline expression: RowExpression, +): DataRow = percentileByOrNull(percentile, skipNaN, expression).suggestIfNull("percentileBy") + +public fun DataFrame.percentileBy( + percentile: Double, + column: String, + skipNaN: Boolean = skipNaNDefault, +): DataRow = percentileByOrNull(percentile, column, skipNaN).suggestIfNull("percentileBy") -public fun > Grouped.percentileFor( +@AccessApiOverload +public inline fun ?> DataFrame.percentileBy( + percentile: Double, + column: ColumnReference, + skipNaN: Boolean = skipNaNDefault, +): DataRow = percentileByOrNull(percentile, column, skipNaN).suggestIfNull("percentileBy") + +@AccessApiOverload +public inline fun ?> DataFrame.percentileBy( percentile: Double, - columns: ColumnsForAggregateSelector, -): DataFrame = Aggregators.percentile(percentile).aggregateFor(this, columns) + column: KProperty, + skipNaN: Boolean = skipNaNDefault, +): DataRow = percentileByOrNull(percentile, column, skipNaN).suggestIfNull("percentileBy") + +public inline fun ?> DataFrame.percentileByOrNull( + percentile: Double, + skipNaN: Boolean = skipNaNDefault, + crossinline expression: RowExpression, +): DataRow? = Aggregators.percentileCommon(percentile, skipNaN).aggregateByOrNull(this, expression) + +public fun DataFrame.percentileByOrNull( + percentile: Double, + column: String, + skipNaN: Boolean = skipNaNDefault, +): DataRow? = percentileByOrNull(percentile, column.toColumnOf?>(), skipNaN) + +@AccessApiOverload +public inline fun ?> DataFrame.percentileByOrNull( + percentile: Double, + column: ColumnReference, + skipNaN: Boolean = skipNaNDefault, +): DataRow? = Aggregators.percentileCommon(percentile, skipNaN).aggregateByOrNull(this, column) + +@AccessApiOverload +public inline fun ?> DataFrame.percentileByOrNull( + percentile: Double, + column: KProperty, + skipNaN: Boolean = skipNaNDefault, +): DataRow? = percentileByOrNull(percentile, column.toColumnAccessor(), skipNaN) + +// endregion + +// region GroupBy +@Refine +@Interpretable("GroupByPercentile1") +public fun Grouped.percentile(percentile: Double, skipNaN: Boolean = skipNaNDefault): DataFrame = + percentileFor(percentile, skipNaN, intraComparableColumns()) + +@Refine +@Interpretable("GroupByPercentile0") +public fun ?> Grouped.percentileFor( + percentile: Double, + skipNaN: Boolean = skipNaNDefault, + columns: ColumnsForAggregateSelector, +): DataFrame = Aggregators.percentileCommon(percentile, skipNaN).aggregateFor(this, columns) public fun Grouped.percentileFor(percentile: Double, vararg columns: String): DataFrame = percentileFor(percentile) { columns.toComparableColumns() } @AccessApiOverload -public fun > Grouped.percentileFor( +public fun ?> Grouped.percentileFor( percentile: Double, - vararg columns: ColumnReference, -): DataFrame = percentileFor(percentile) { columns.toColumnSet() } + vararg columns: ColumnReference, + skipNaN: Boolean = skipNaNDefault, +): DataFrame = percentileFor(percentile, skipNaN) { columns.toColumnSet() } @AccessApiOverload -public fun > Grouped.percentileFor( +public fun ?> Grouped.percentileFor( percentile: Double, - vararg columns: KProperty, -): DataFrame = percentileFor(percentile) { columns.toColumnSet() } + vararg columns: KProperty, + skipNaN: Boolean = skipNaNDefault, +): DataFrame = percentileFor(percentile, skipNaN) { columns.toColumnSet() } -public fun > Grouped.percentile( +@Refine +@Interpretable("GroupByPercentile0") +public fun ?> Grouped.percentile( percentile: Double, name: String? = null, - columns: ColumnsSelector, -): DataFrame = Aggregators.percentile(percentile).aggregateAll(this, name, columns) + skipNaN: Boolean = skipNaNDefault, + columns: ColumnsSelector, +): DataFrame = Aggregators.percentileCommon(percentile, skipNaN).aggregateAll(this, name, columns) -public fun Grouped.percentile(percentile: Double, vararg columns: String, name: String? = null): DataFrame = - percentile(percentile, name) { columns.toComparableColumns() } +public fun Grouped.percentile( + percentile: Double, + vararg columns: String, + name: String? = null, + skipNaN: Boolean = skipNaNDefault, +): DataFrame = percentile(percentile, name, skipNaN) { columns.toComparableColumns() } @AccessApiOverload -public fun > Grouped.percentile( +public fun ?> Grouped.percentile( percentile: Double, - vararg columns: ColumnReference, + vararg columns: ColumnReference, name: String? = null, -): DataFrame = percentile(percentile, name) { columns.toColumnSet() } + skipNaN: Boolean = skipNaNDefault, +): DataFrame = percentile(percentile, name, skipNaN) { columns.toColumnSet() } @AccessApiOverload -public fun > Grouped.percentile( +public fun ?> Grouped.percentile( percentile: Double, - vararg columns: KProperty, + vararg columns: KProperty, name: String? = null, -): DataFrame = percentile(percentile, name) { columns.toColumnSet() } + skipNaN: Boolean = skipNaNDefault, +): DataFrame = percentile(percentile, name, skipNaN) { columns.toColumnSet() } -public inline fun > Grouped.percentileOf( +@Refine +@Interpretable("GroupByPercentileOf") +public inline fun ?> Grouped.percentileOf( percentile: Double, name: String? = null, - crossinline expression: RowExpression, -): DataFrame = Aggregators.percentile(percentile).cast().aggregateOf(this, name, expression) + skipNaN: Boolean = skipNaNDefault, + crossinline expression: RowExpression, +): DataFrame = Aggregators.percentileCommon(percentile, skipNaN).aggregateOf(this, name, expression) + +@Interpretable("GroupByReduceExpression") // TODO? +public inline fun ?> GroupBy.percentileBy( + percentile: Double, + skipNaN: Boolean = skipNaNDefault, + crossinline rowExpression: RowExpression, +): ReducedGroupBy = reduce { percentileByOrNull(percentile, skipNaN, rowExpression) } + +@AccessApiOverload +public inline fun ?> GroupBy.percentileBy( + percentile: Double, + column: ColumnReference, + skipNaN: Boolean = skipNaNDefault, +): ReducedGroupBy = reduce { percentileByOrNull(percentile, column, skipNaN) } + +public fun GroupBy.percentileBy( + percentile: Double, + column: String, + skipNaN: Boolean = skipNaNDefault, +): ReducedGroupBy = percentileBy(percentile, column.toColumnAccessor().cast?>(), skipNaN) + +@AccessApiOverload +public inline fun ?> GroupBy.percentileBy( + percentile: Double, + column: KProperty, + skipNaN: Boolean = skipNaNDefault, +): ReducedGroupBy = percentileBy(percentile, column.toColumnAccessor(), skipNaN) // endregion // region Pivot -public fun Pivot.percentile(percentile: Double, separate: Boolean = false): DataRow = - percentileFor(percentile, separate, intraComparableColumns()) +public fun Pivot.percentile( + percentile: Double, + separate: Boolean = false, + skipNaN: Boolean = skipNaNDefault, +): DataRow = percentileFor(percentile, separate, skipNaN, intraComparableColumns()) -public fun > Pivot.percentileFor( +public fun ?> Pivot.percentileFor( percentile: Double, separate: Boolean = false, - columns: ColumnsForAggregateSelector, -): DataRow = delegate { percentileFor(percentile, separate, columns) } + skipNaN: Boolean = skipNaNDefault, + columns: ColumnsForAggregateSelector, +): DataRow = delegate { percentileFor(percentile, separate, skipNaN, columns) } public fun Pivot.percentileFor( percentile: Double, vararg columns: String, separate: Boolean = false, -): DataRow = percentileFor(percentile, separate) { columns.toComparableColumns() } + skipNaN: Boolean = skipNaNDefault, +): DataRow = percentileFor(percentile, separate, skipNaN) { columns.toComparableColumns() } @AccessApiOverload -public fun > Pivot.percentileFor( +public fun ?> Pivot.percentileFor( percentile: Double, - vararg columns: ColumnReference, + vararg columns: ColumnReference, separate: Boolean = false, -): DataRow = percentileFor(percentile, separate) { columns.toColumnSet() } + skipNaN: Boolean = skipNaNDefault, +): DataRow = percentileFor(percentile, separate, skipNaN) { columns.toColumnSet() } @AccessApiOverload -public fun > Pivot.percentileFor( +public fun ?> Pivot.percentileFor( percentile: Double, - vararg columns: KProperty, + vararg columns: KProperty, separate: Boolean = false, -): DataRow = percentileFor(percentile, separate) { columns.toColumnSet() } + skipNaN: Boolean = skipNaNDefault, +): DataRow = percentileFor(percentile, separate, skipNaN) { columns.toColumnSet() } -public fun > Pivot.percentile(percentile: Double, columns: ColumnsSelector): DataRow = - delegate { percentile(percentile, columns) } +public fun ?> Pivot.percentile( + percentile: Double, + skipNaN: Boolean = skipNaNDefault, + columns: ColumnsSelector, +): DataRow = delegate { percentile(percentile, skipNaN, columns) } -public fun Pivot.percentile(percentile: Double, vararg columns: String): DataRow = - percentile(percentile) { columns.toComparableColumns() } +public fun Pivot.percentile( + percentile: Double, + vararg columns: String, + skipNaN: Boolean = skipNaNDefault, +): DataRow = percentile(percentile, skipNaN) { columns.toComparableColumns() } + +@AccessApiOverload +public fun ?> Pivot.percentile( + percentile: Double, + vararg columns: ColumnReference, + skipNaN: Boolean = skipNaNDefault, +): DataRow = percentile(percentile, skipNaN) { columns.toColumnSet() } @AccessApiOverload -public fun > Pivot.percentile( +public fun ?> Pivot.percentile( percentile: Double, - vararg columns: ColumnReference, -): DataRow = percentile(percentile) { columns.toColumnSet() } + vararg columns: KProperty, + skipNaN: Boolean = skipNaNDefault, +): DataRow = percentile(percentile, skipNaN) { columns.toColumnSet() } + +public inline fun ?> Pivot.percentileOf( + percentile: Double, + skipNaN: Boolean = skipNaNDefault, + crossinline expression: RowExpression, +): DataRow = delegate { percentileOf(percentile, skipNaN, expression) } + +public inline fun ?> Pivot.percentileBy( + percentile: Double, + skipNaN: Boolean = skipNaNDefault, + crossinline rowExpression: RowExpression, +): ReducedPivot = reduce { percentileByOrNull(percentile, skipNaN, rowExpression) } @AccessApiOverload -public fun > Pivot.percentile(percentile: Double, vararg columns: KProperty): DataRow = - percentile(percentile) { columns.toColumnSet() } +public inline fun ?> Pivot.percentileBy( + percentile: Double, + column: ColumnReference, + skipNaN: Boolean = skipNaNDefault, +): ReducedPivot = reduce { percentileByOrNull(percentile, column, skipNaN) } -public inline fun > Pivot.percentileOf( +public fun Pivot.percentileBy( percentile: Double, - crossinline expression: RowExpression, -): DataRow = delegate { percentileOf(percentile, expression) } + column: String, + skipNaN: Boolean = skipNaNDefault, +): ReducedPivot = percentileBy(percentile, column.toColumnAccessor().cast?>(), skipNaN) +@AccessApiOverload +public inline fun ?> Pivot.percentileBy( + percentile: Double, + column: KProperty, + skipNaN: Boolean = skipNaNDefault, +): ReducedPivot = percentileBy(percentile, column.toColumnAccessor(), skipNaN) // endregion // region PivotGroupBy -public fun PivotGroupBy.percentile(percentile: Double, separate: Boolean = false): DataFrame = - percentileFor(percentile, separate, intraComparableColumns()) +public fun PivotGroupBy.percentile( + percentile: Double, + separate: Boolean = false, + skipNaN: Boolean = skipNaNDefault, +): DataFrame = percentileFor(percentile, separate, skipNaN, intraComparableColumns()) -public fun > PivotGroupBy.percentileFor( +public fun ?> PivotGroupBy.percentileFor( percentile: Double, separate: Boolean = false, - columns: ColumnsForAggregateSelector, -): DataFrame = Aggregators.percentile(percentile).aggregateFor(this, separate, columns) + skipNaN: Boolean = skipNaNDefault, + columns: ColumnsForAggregateSelector, +): DataFrame = Aggregators.percentileCommon(percentile, skipNaN).aggregateFor(this, separate, columns) public fun PivotGroupBy.percentileFor( percentile: Double, vararg columns: String, separate: Boolean = false, -): DataFrame = percentileFor(percentile, separate) { columns.toComparableColumns() } + skipNaN: Boolean = skipNaNDefault, +): DataFrame = percentileFor(percentile, separate, skipNaN) { columns.toComparableColumns() } @AccessApiOverload -public fun > PivotGroupBy.percentileFor( +public fun ?> PivotGroupBy.percentileFor( percentile: Double, - vararg columns: ColumnReference, + vararg columns: ColumnReference, separate: Boolean = false, -): DataFrame = percentileFor(percentile, separate) { columns.toColumnSet() } + skipNaN: Boolean = skipNaNDefault, +): DataFrame = percentileFor(percentile, separate, skipNaN) { columns.toColumnSet() } @AccessApiOverload -public fun > PivotGroupBy.percentileFor( +public fun ?> PivotGroupBy.percentileFor( percentile: Double, - vararg columns: KProperty, + vararg columns: KProperty, separate: Boolean = false, -): DataFrame = percentileFor(percentile, separate) { columns.toColumnSet() } + skipNaN: Boolean = skipNaNDefault, +): DataFrame = percentileFor(percentile, separate, skipNaN) { columns.toColumnSet() } -public fun > PivotGroupBy.percentile( +public fun ?> PivotGroupBy.percentile( percentile: Double, - columns: ColumnsSelector, -): DataFrame = Aggregators.percentile(percentile).aggregateAll(this, columns) + skipNaN: Boolean = skipNaNDefault, + columns: ColumnsSelector, +): DataFrame = Aggregators.percentileCommon(percentile, skipNaN).aggregateAll(this, columns) -public fun PivotGroupBy.percentile(percentile: Double, vararg columns: String): DataFrame = - percentile(percentile) { columns.toComparableColumns() } +public fun PivotGroupBy.percentile( + percentile: Double, + vararg columns: String, + skipNaN: Boolean = skipNaNDefault, +): DataFrame = percentile(percentile, skipNaN) { columns.toComparableColumns() } + +@AccessApiOverload +public fun ?> PivotGroupBy.percentile( + percentile: Double, + vararg columns: ColumnReference, + skipNaN: Boolean = skipNaNDefault, +): DataFrame = percentile(percentile, skipNaN) { columns.toColumnSet() } @AccessApiOverload -public fun > PivotGroupBy.percentile( +public fun ?> PivotGroupBy.percentile( percentile: Double, - vararg columns: ColumnReference, -): DataFrame = percentile(percentile) { columns.toColumnSet() } + vararg columns: KProperty, + skipNaN: Boolean = skipNaNDefault, +): DataFrame = percentile(percentile, skipNaN) { columns.toColumnSet() } + +public inline fun ?> PivotGroupBy.percentileOf( + percentile: Double, + skipNaN: Boolean = skipNaNDefault, + crossinline expression: RowExpression, +): DataFrame = Aggregators.percentileCommon(percentile, skipNaN).aggregateOf(this, expression) + +public inline fun ?> PivotGroupBy.percentileBy( + percentile: Double, + skipNaN: Boolean = skipNaNDefault, + crossinline rowExpression: RowExpression, +): ReducedPivotGroupBy = reduce { percentileByOrNull(percentile, skipNaN, rowExpression) } @AccessApiOverload -public fun > PivotGroupBy.percentile( +public inline fun ?> PivotGroupBy.percentileBy( percentile: Double, - vararg columns: KProperty, -): DataFrame = percentile(percentile) { columns.toColumnSet() } + column: ColumnReference, + skipNaN: Boolean = skipNaNDefault, +): ReducedPivotGroupBy = reduce { percentileByOrNull(percentile, column, skipNaN) } -public inline fun > PivotGroupBy.percentileOf( +public fun PivotGroupBy.percentileBy( + percentile: Double, + column: String, + skipNaN: Boolean = skipNaNDefault, +): ReducedPivotGroupBy = percentileBy(percentile, column.toColumnAccessor().cast?>(), skipNaN) + +@AccessApiOverload +public inline fun ?> PivotGroupBy.percentileBy( percentile: Double, - crossinline expression: RowExpression, -): DataFrame = Aggregators.percentile(percentile).cast().aggregateOf(this, expression) + column: KProperty, + skipNaN: Boolean = skipNaNDefault, +): ReducedPivotGroupBy = percentileBy(percentile, column.toColumnAccessor(), skipNaN) // endregion diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/aggregation/aggregators/Aggregators.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/aggregation/aggregators/Aggregators.kt index 993f32104e..52a5f7412c 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/aggregation/aggregators/Aggregators.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/aggregation/aggregators/Aggregators.kt @@ -11,6 +11,7 @@ import org.jetbrains.kotlinx.dataframe.impl.aggregation.aggregators.multipleColu import org.jetbrains.kotlinx.dataframe.math.indexOfMax import org.jetbrains.kotlinx.dataframe.math.indexOfMedian import org.jetbrains.kotlinx.dataframe.math.indexOfMin +import org.jetbrains.kotlinx.dataframe.math.indexOfPercentile import org.jetbrains.kotlinx.dataframe.math.maxOrNull import org.jetbrains.kotlinx.dataframe.math.maxTypeConversion import org.jetbrains.kotlinx.dataframe.math.mean @@ -19,7 +20,8 @@ import org.jetbrains.kotlinx.dataframe.math.medianConversion import org.jetbrains.kotlinx.dataframe.math.medianOrNull import org.jetbrains.kotlinx.dataframe.math.minOrNull import org.jetbrains.kotlinx.dataframe.math.minTypeConversion -import org.jetbrains.kotlinx.dataframe.math.percentile +import org.jetbrains.kotlinx.dataframe.math.percentileConversion +import org.jetbrains.kotlinx.dataframe.math.percentileOrNull import org.jetbrains.kotlinx.dataframe.math.std import org.jetbrains.kotlinx.dataframe.math.stdTypeConversion import org.jetbrains.kotlinx.dataframe.math.sum @@ -147,11 +149,35 @@ internal object Aggregators { } } - // T: Comparable? -> T - val percentile by withOneOption { percentile: Double -> - flattenReducingForAny> { type -> - asIterable().percentile(percentile, type) - } + // T : primitive Number? -> Double? + // T : Comparable? -> T? + fun percentileCommon( + percentile: Double, + skipNaN: Boolean, + ): Aggregator + where T : Comparable? = + this.percentile.invoke(percentile, skipNaN).cast2() + + // T : Comparable? -> T? + fun percentileComparables(percentile: Double): Aggregator + where T : Comparable? = + percentileCommon(percentile, skipNaNDefault).cast2() + + // T : primitive Number? -> Double? + fun percentileNumbers( + percentile: Double, + skipNaN: Boolean, + ): Aggregator + where T : Comparable?, T : Number? = + percentileCommon(percentile, skipNaN).cast2() + + @Suppress("UNCHECKED_CAST") + private val percentile by withTwoOptions { percentile: Double, skipNaN: Boolean -> + flattenHybridForAny, Comparable?>( + getReturnType = percentileConversion, + reducer = { type -> percentileOrNull(percentile, type, skipNaN) as Comparable? }, + indexOfResult = { type -> indexOfPercentile(percentile, type, skipNaN) }, + ) } // T : primitive Number? -> Double? diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/describe.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/describe.kt index 08ea27a182..9611289994 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/describe.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/describe.kt @@ -6,7 +6,6 @@ import org.jetbrains.kotlinx.dataframe.DataFrame import org.jetbrains.kotlinx.dataframe.api.ColumnDescription import org.jetbrains.kotlinx.dataframe.api.add import org.jetbrains.kotlinx.dataframe.api.after -import org.jetbrains.kotlinx.dataframe.api.any import org.jetbrains.kotlinx.dataframe.api.asColumnGroup import org.jetbrains.kotlinx.dataframe.api.asComparable import org.jetbrains.kotlinx.dataframe.api.asNumbers diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/math/median.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/math/median.kt index 4b28a4ec30..8ac6474942 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/math/median.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/math/median.kt @@ -8,18 +8,15 @@ import org.jetbrains.kotlinx.dataframe.impl.isIntraComparable import org.jetbrains.kotlinx.dataframe.impl.isPrimitiveNumber import org.jetbrains.kotlinx.dataframe.impl.nothingType import org.jetbrains.kotlinx.dataframe.impl.renderType -import org.jetbrains.kotlinx.dataframe.math.quickSelect import java.math.BigDecimal import java.math.BigInteger +import kotlin.math.round import kotlin.reflect.KType import kotlin.reflect.full.withNullability import kotlin.reflect.typeOf private val logger = KotlinLogging.logger { } -// TODO median always returns the same type, but this can be confusing for iterables of even length -// TODO (e.g. median of [1, 2] should be 1.5, but the type is Int, so it returns 1), Issue #558 - /** * Returns the median of the comparable input: * - `null` if empty @@ -27,7 +24,8 @@ private val logger = KotlinLogging.logger { } * - `Double.NaN` if ![skipNaN] and contains NaN * - (lower) middle else * - * TODO migrate back to percentile when it's flexible enough + * Based on quantile implementation; + * uses [QuantileEstimationMethod.R8] for primitive numbers, else [QuantileEstimationMethod.R3]. */ @PublishedApi internal fun > Sequence.medianOrNull(type: KType, skipNaN: Boolean): Any? { @@ -35,6 +33,9 @@ internal fun > Sequence.medianOrNull(type: KType, skipNaN: type.isMarkedNullable -> error("Encountered nullable type ${renderType(type)} in median function. This should not occur.") + // this means the sequence is empty + type == nothingType -> return null + !type.isIntraComparable() -> error( "Unable to compute the median for ${ @@ -49,44 +50,27 @@ internal fun > Sequence.medianOrNull(type: KType, skipNaN: type == typeOf() -> logger.warn { "Converting Longs to Doubles to calculate the median, loss of precision may occur." } - - // this means the sequence is empty - type == nothingType -> return null } - // propagate NaN to return if they are not to be skipped - if (type.canBeNaN && !skipNaN && any { it.isNaN }) return Double.NaN - - val list = when { - type.canBeNaN -> filter { !it.isNaN } - else -> this - }.toList() + val p = 0.5 - val size = list.size - if (size == 0) return null + // TODO make configurable? https://github.com/Kotlin/dataframe/issues/1121 + val (values, method) = + when { + type.isPrimitiveNumber() -> + this.map { (it as Number).toDouble() } to QuantileEstimationMethod.Interpolating.R8 - if (size == 1) { - val single = list.single() - return if (type.isPrimitiveNumber()) (single as Number).toDouble() else single - } + else -> + this to QuantileEstimationMethod.Selecting.R3 + } - val isOdd = size % 2 != 0 - - val middleIndex = (size - 1) / 2 - val lower = list.quickSelect(middleIndex) - val upper = list.quickSelect(middleIndex + 1) - - return when { - isOdd && type.isPrimitiveNumber() -> (lower as Number).toDouble() - isOdd -> lower - type == typeOf() -> (lower as Double + upper as Double) / 2.0 - type == typeOf() -> ((lower as Float).toDouble() + (upper as Float).toDouble()) / 2.0 - type == typeOf() -> ((lower as Int).toDouble() + (upper as Int).toDouble()) / 2.0 - type == typeOf() -> ((lower as Short).toDouble() + (upper as Short).toDouble()) / 2.0 - type == typeOf() -> ((lower as Byte).toDouble() + (upper as Byte).toDouble()) / 2.0 - type == typeOf() -> ((lower as Long).toDouble() + (upper as Long).toDouble()) / 2.0 - else -> lower - } + return values.quantileOrNull( + p = p, + type = type, + skipNaN = skipNaN, + method = method, + name = "median", + ) } /** @@ -95,11 +79,11 @@ internal fun > Sequence.medianOrNull(type: KType, skipNaN: */ internal val medianConversion: CalculateReturnType = { type, isEmpty -> when { - // uses linear interpolation, number 7 of Hyndman and Fan "Sample quantiles in statistical packages" + // uses linear interpolation, R8 of Hyndman and Fan "Sample quantiles in statistical packages" type.isPrimitiveNumber() -> typeOf() // closest rank method, preferring lower middle, - // number 3 of Hyndman and Fan "Sample quantiles in statistical packages" + // R3 of Hyndman and Fan "Sample quantiles in statistical packages" type.isIntraComparable() -> type else -> error("Can not calculate median for type ${renderType(type)}") @@ -107,17 +91,20 @@ internal val medianConversion: CalculateReturnType = { type, isEmpty -> } /** - * Returns the index of the median of the comparable input: + * Returns the index of the median in the comparable input: * - `-1` if empty or all `null` * - index of first NaN if ![skipNaN] and contains NaN * - index (lower) middle else * NOTE: For primitive numbers the `seq.elementAt(seq.indexOfMedian())` might be different from `seq.medianOrNull()` * - * TODO migrate back to percentile when it's flexible enough + * Based on quantile implementation; uses [QuantileEstimationMethod.R3]. */ internal fun ?> Sequence.indexOfMedian(type: KType, skipNaN: Boolean): Int { val nonNullType = type.withNullability(false) when { + // this means the sequence is empty + nonNullType == nothingType -> return -1 + !nonNullType.isIntraComparable() -> error( "Unable to compute the median for ${ @@ -129,9 +116,6 @@ internal fun ?> Sequence.indexOfMedian(type: KType, s throw IllegalArgumentException( "Cannot calculate the median for big numbers in DataFrame. Only primitive numbers are supported.", ) - - // this means the sequence is empty - nonNullType == nothingType -> return -1 } // propagate NaN to return if they are not to be skipped @@ -148,22 +132,27 @@ internal fun ?> Sequence.indexOfMedian(type: KType, s IndexedComparable(i, it) } } - val list = when { - nonNullType.canBeNaN -> indexedSequence.filterNot { it.value.isNaN } - else -> indexedSequence - }.toList() - - val size = list.size - if (size == 0) return -1 - if (size == 1) return 0 - val middleIndex = (size - 1) / 2 - val lower = list.quickSelect(middleIndex) + // TODO make configurable? https://github.com/Kotlin/dataframe/issues/1121 + val method = QuantileEstimationMethod.R3 + val p = 0.5 + + // get the index where the median can be found in the sorted sequence + val indexEstimation = indexedSequence.quantileIndexEstimation( + p = p, + type = typeOf>(), + skipNaN = skipNaN, + method = method, + name = "median", + ) + if (indexEstimation.isNaN()) return this.indexOfFirst { it.isNaN } + if (indexEstimation < 0.0) return -1 + require(indexEstimation == round(indexEstimation)) { + "median expected a whole number index from quantileIndexEstimation but was $indexEstimation" + } - return lower.index -} + val medianResult = indexedSequence.toList().quickSelect(k = indexEstimation.toInt()) -private data class IndexedComparable>(val index: Int, val value: T) : - Comparable> { - override fun compareTo(other: IndexedComparable): Int = value.compareTo(other.value) + // return the original unsorted index of the found result + return medianResult.index } diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/math/percentile.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/math/percentile.kt index bbcc3f7f26..750e2d28ad 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/math/percentile.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/math/percentile.kt @@ -1,117 +1,145 @@ package org.jetbrains.kotlinx.dataframe.math -import org.jetbrains.kotlinx.dataframe.impl.asList +import io.github.oshai.kotlinlogging.KotlinLogging +import org.jetbrains.kotlinx.dataframe.api.isNaN +import org.jetbrains.kotlinx.dataframe.impl.aggregation.aggregators.CalculateReturnType +import org.jetbrains.kotlinx.dataframe.impl.isIntraComparable +import org.jetbrains.kotlinx.dataframe.impl.isPrimitiveNumber +import org.jetbrains.kotlinx.dataframe.impl.nothingType +import org.jetbrains.kotlinx.dataframe.impl.renderType import java.math.BigDecimal import java.math.BigInteger +import kotlin.math.round import kotlin.reflect.KType - -@PublishedApi -internal fun > Iterable.percentile(percentile: Double, type: KType): T? { - require(percentile in 0.0..100.0) { "Percentile must be in range [0, 100]" } - - @Suppress("UNCHECKED_CAST") - val list = if (type.isMarkedNullable) filterNotNull() else (this as Iterable).asList() - val size = list.size - if (size == 0) return null - - val index = (percentile / 100.0 * (size - 1)).toInt() - val fraction = (percentile / 100.0 * (size - 1)) - index - - // median handle for even sized list (legacy logic) - if (percentile == 50.0 && size % 2 == 0) { - val lower = list.quickSelect(index) - val upper = list.quickSelect(index + 1) - - return when (type) { - Double::class -> ((lower as Double + upper as Double) / 2.0) as T - Float::class -> ((lower as Float + upper as Float) / 2.0f) as T - Int::class -> ((lower as Int + upper as Int) / 2) as T - Short::class -> ((lower as Short + upper as Short) / 2).toShort() as T - Long::class -> ((lower as Long + upper as Long) / 2L) as T - Byte::class -> ((lower as Byte + upper as Byte) / 2).toByte() as T - BigDecimal::class -> ((lower as BigDecimal + upper as BigDecimal) / 2.toBigDecimal()) as T - BigInteger::class -> ((lower as BigInteger + upper as BigInteger) / 2.toBigInteger()) as T - else -> lower - } +import kotlin.reflect.full.withNullability +import kotlin.reflect.typeOf + +private val logger = KotlinLogging.logger { } + +/** + * Uses [QuantileEstimationMethod.R8] for primitive numbers, else [QuantileEstimationMethod.R3] + */ +internal fun > Sequence.percentileOrNull(percentile: Double, type: KType, skipNaN: Boolean): Any? { + when { + percentile !in 0.0..100.0 -> error("Percentile must be in range [0, 100]") + + type.isMarkedNullable -> + error("Encountered nullable type ${renderType(type)} in percentile function. This should not occur.") + + // this means the sequence is empty + type == nothingType -> return null + + !type.isIntraComparable() -> + error( + "Unable to compute the percentile for ${ + renderType(type) + }. Only primitive numbers or self-comparables are supported.", + ) + + type == typeOf() || type == typeOf() -> + throw IllegalArgumentException( + "Cannot calculate the percentile for big numbers in DataFrame. Only primitive numbers are supported.", + ) + + type == typeOf() -> + logger.warn { "Converting Longs to Doubles to calculate the percentile, loss of precision may occur." } } - if (fraction == 0.0) { - return list.quickSelect(index) - } - - val lower = list.quickSelect(index) - val upper = list.quickSelect(index + 1) - - return when (type.classifier) { - Double::class -> ((lower as Double) + (upper as Double - lower as Double) * fraction) as T + // percentile of 25.0 means the 25th 100-quantile, so 25 / 100 = 0.25 + val p = percentile / 100.0 - Float::class -> ((lower as Float) + (upper as Float - lower as Float) * fraction) as T - - Int::class -> ((lower as Int) + (upper as Int - lower as Int) * fraction).toInt() as T + // TODO make configurable https://github.com/Kotlin/dataframe/issues/1121 + val (values, method) = + when { + type.isPrimitiveNumber() -> + this.map { (it as Number).toDouble() } to QuantileEstimationMethod.Interpolating.R8 - Short::class -> ((lower as Short) + (upper as Short - lower as Short) * fraction).toInt().toShort() as T + else -> + this to QuantileEstimationMethod.Selecting.R3 + } - Long::class -> ((lower as Long) + (upper as Long - lower as Long) * fraction).toLong() as T + return values.quantileOrNull( + p = p, + type = type, + skipNaN = skipNaN, + method = method, + name = "percentile", + ) +} - Byte::class -> ((lower as Byte) + (upper as Byte - lower as Byte) * fraction).toInt().toByte() as T +internal val percentileConversion: CalculateReturnType = { type, isEmpty -> + when { + // uses linear interpolation, R8 of Hyndman and Fan "Sample quantiles in statistical packages" + type.isPrimitiveNumber() -> typeOf() - BigDecimal::class -> ( - (lower as BigDecimal) + - (upper as BigDecimal - lower as BigDecimal) * fraction.toBigDecimal() - ) as T + // closest rank method, preferring lower middle, + // R3 of Hyndman and Fan "Sample quantiles in statistical packages" + type.isIntraComparable() -> type - BigInteger::class -> ( - (lower as BigInteger) + - ( - (upper as BigInteger - lower as BigInteger) * fraction.toBigDecimal() - .toBigInteger() - ) - ) as T + else -> error("Can not calculate percentile for type ${renderType(type)}") + }.withNullability(isEmpty) +} - else -> lower +/** + * Returns the index of the [percentile] in the unsorted sequence [this]. + * If `!`[skipNaN] and the sequence [this] contains NaN, the index of the first NaN will be returned. + * Returns -1 if the sequence is empty. + */ +internal fun ?> Sequence.indexOfPercentile( + percentile: Double, + type: KType, + skipNaN: Boolean, +): Int { + val nonNullType = type.withNullability(false) + when { + percentile !in 0.0..100.0 -> error("Percentile must be in range [0, 100]") + + // this means the sequence is empty + nonNullType == nothingType -> return -1 + + !nonNullType.isIntraComparable() -> + error( + "Unable to compute the percentile for ${ + renderType(type) + }. Only primitive numbers or self-comparables are supported.", + ) + + nonNullType == typeOf() || nonNullType == typeOf() -> + throw IllegalArgumentException( + "Cannot calculate the percentile for big numbers in DataFrame. Only primitive numbers are supported.", + ) } -} -@PublishedApi -internal fun > List.quickSelect(k: Int): T { - if (k < 0 || k >= size) throw IndexOutOfBoundsException("k = $k, size = $size") - - var list = this - var temp = mutableListOf() - var less = mutableListOf() - var k = k - var greater = mutableListOf() - while (list.size > 1) { - var equal = 0 - val x = list.random() - greater.clear() - less.clear() - for (v in list) { - val comp = v.compareTo(x) - when { - comp < 0 -> less.add(v) - comp > 0 -> greater.add(v) - else -> equal++ - } - } - when { - k < less.size -> { - list = less - less = temp - temp = list - } - - k < less.size + equal -> { - return x - } - - else -> { - list = greater - greater = temp - temp = list - k -= less.size + equal - } + val indexedSequence = this.mapIndexedNotNull { i, it -> + if (it == null) { + null + } else { + IndexedComparable(i, it) } } - return list[0] + + // TODO make configurable https://github.com/Kotlin/dataframe/issues/1121 + val method = QuantileEstimationMethod.R3 + + // percentile of 25.0 means the 25th 100-quantile, so 25 / 100 = 0.25 + val p = percentile / 100.0 + + // get the index where the percentile can be found in the sorted sequence + val indexEstimation = indexedSequence.quantileIndexEstimation( + p = p, + type = typeOf>(), + skipNaN = skipNaN, + method = method, + name = "percentile", + ) + if (indexEstimation.isNaN()) return this.indexOfFirst { it.isNaN } + if (indexEstimation < 0.0) return -1 + require(indexEstimation == round(indexEstimation)) { + "percentile expected a whole number index from quantileIndexEstimation but was $indexEstimation" + } + + val percentileResult = indexedSequence.toList().quickSelect(k = indexEstimation.toInt()) + + // return the original unsorted index of the found result + return percentileResult.index } diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/math/quantile.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/math/quantile.kt new file mode 100644 index 0000000000..e7dbcfc0f4 --- /dev/null +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/math/quantile.kt @@ -0,0 +1,333 @@ +package org.jetbrains.kotlinx.dataframe.math + +import io.github.oshai.kotlinlogging.KotlinLogging +import org.jetbrains.kotlinx.dataframe.api.isNaN +import org.jetbrains.kotlinx.dataframe.impl.canBeNaN +import org.jetbrains.kotlinx.dataframe.impl.isIntraComparable +import org.jetbrains.kotlinx.dataframe.impl.isPrimitiveNumber +import org.jetbrains.kotlinx.dataframe.impl.nothingType +import org.jetbrains.kotlinx.dataframe.impl.renderType +import java.math.BigDecimal +import java.math.BigInteger +import kotlin.math.ceil +import kotlin.math.floor +import kotlin.math.round +import kotlin.reflect.KType +import kotlin.reflect.full.withNullability +import kotlin.reflect.typeOf + +private val logger = KotlinLogging.logger { } + +/** + * Returns the p-quantile: the k'th q-quantile, where p = k/q. + * + * When [method] is a [QuantileEstimationMethod.Selecting] method, + * [this] can be a sequence with any self-comparable type. + * The returned value will be selected from the sequence. + * + * Otherwise, when [method] is a [QuantileEstimationMethod.Interpolating] method, + * [this] can only be a sequence with primitive number types. + * The returned value will be [Double]. + * + * Nulls are not allowed. If NaN is among the values, it will be returned. + * + * @see QuantileEstimationMethod + */ +internal fun > Sequence.quantileOrNull( + p: Double, + type: KType, + skipNaN: Boolean, + method: QuantileEstimationMethod, + name: String = "quantile", +): Any? { + when { + p !in 0.0..1.0 -> error("Quantile must be in range [0, 1]") + + type.isMarkedNullable -> + error("Encountered nullable type ${renderType(type)} in $name function. This should not occur.") + + // this means the sequence is empty + type == nothingType -> return null + + !type.isIntraComparable() -> + error( + "Unable to compute the $name for ${ + renderType(type) + }. Only primitive numbers or self-comparables are supported.", + ) + + type == typeOf() || type == typeOf() -> + throw IllegalArgumentException( + "Cannot calculate the $name for big numbers in DataFrame. Only primitive numbers are supported.", + ) + + type == typeOf() -> + logger.warn { "Converting Longs to Doubles to calculate the $name, loss of precision may occur." } + } + + // propagate NaN to return if they are not to be skipped + if (type.canBeNaN && !skipNaN && any { it.isNaN }) { + // ensure that using a selecting quantile estimation method always returns the same type as the input + if (type == typeOf() && method is QuantileEstimationMethod.Selecting) return Float.NaN + + return Double.NaN + } + + val list = when { + type.canBeNaN -> filter { !it.isNaN } + else -> this + }.toList() + + val size = list.size + if (size == 0) return null + + if (size == 1) { + val single = list.single() + return if (type.isPrimitiveNumber()) (single as Number).toDouble() else single + } + + return when (method) { + is QuantileEstimationMethod.Selecting -> + method.quantile(p, list as List) + + is QuantileEstimationMethod.Interpolating -> { + require(type.isPrimitiveNumber()) { + "Cannot calculate the $name for non-primitive numbers with estimation method $method." + } + @Suppress("UNCHECKED_CAST") + val convertedList = + if (type == typeOf()) { + list as List + } else { + list.map { (it as Number).toDouble() } + } + + method.quantile(p, convertedList) + } + } +} + +/** + * Returns the index `i` of the [p]-quantile: the k'th q-quantile, where p = k/q. + * + * The returned index `i` is either exactly or approaching the index of the quantile in the sequence [this] + * (when it's sorted and NaN's removed). + * Returns -1.0 if the sequence [this] is empty. + * Returns [Double.NaN] if `!`[skipNaN] and a NaN is encountered. + */ +internal fun > Sequence.quantileIndexEstimation( + p: Double, + type: KType, + skipNaN: Boolean, + method: QuantileEstimationMethod, + name: String = "quantile", +): Double { + val nonNullType = type.withNullability(false) + + when { + p !in 0.0..1.0 -> error("Quantile must be in range [0, 1]") + + type.isMarkedNullable -> + error("Encountered nullable type ${renderType(type)} in $name function. This should not occur.") + + // this means the sequence is empty + type == nothingType -> return -1.0 + + !nonNullType.isIntraComparable() -> + error( + "Unable to compute the $name for ${ + renderType(type) + }. Only primitive numbers or self-comparables are supported.", + ) + + method is QuantileEstimationMethod.Interpolating && !nonNullType.isPrimitiveNumber() -> + error( + "Cannot calculate the $name for type ${renderType(type)} with estimation method $method." + + "For piecewise linear methods, only primitive numbers are supported", + ) + } + + // propagate NaN to return if they are not to be skipped + if (nonNullType.canBeNaN && !skipNaN) { + if (any { it.isNaN }) return Double.NaN + } + val list = when { + nonNullType.canBeNaN -> this.filterNot { it.isNaN } + else -> this + }.toList() + + val size = list.size + if (size == 0) return -1.0 + if (size == 1) return 0.0 + + return method.indexOfQuantile(p, size).toDouble() +} + +/** + * Inspired by Hyndman and Fan (1996) Sample Quantiles in Statistical Packages. The American Statistician, 50, 361-365. + * DOI:10.1080/00031305.1996.10473566 + * + * and https://commons.apache.org/proper/commons-statistics/commons-statistics-descriptive/javadocs/api-1.1/org/apache/commons/statistics/descriptive/Quantile.EstimationMethod.html + * + * They are split in [Selecting] (where [oneBasedIndexOfQuantile] gives an exact index of the quantile in a sorted list of type [Int]) + * and [Interpolating] (where [oneBasedIndexOfQuantile] gives an approximation for that index of type [Double]). + * For the [Selecting], the [Value] type can thus be any self-comparable type, but for [Interpolating], + * [Value] can only be of type [Double], because it needs to perform calculations on the values. + * + * TODO https://github.com/Kotlin/dataframe/issues/1121 + * - add R2, R4, R5, R6, R9 + * - make public if configurable for percentile function + */ +internal sealed interface QuantileEstimationMethod, Index : Number> { + + /** + * Gives the (1-based) index of the [p]-quantile for a distribution of size [count]. + * If the result `h` is a whole number, the `h`'th smallest of the [count] values is the quantile estimate. + * If not, `h` is an estimation of the index of the p-quantile. Rounding or interpolation needs to occur to get + * the actual quantile estimate. + */ + fun oneBasedIndexOfQuantile(p: Double, count: Int): Index + + fun quantile(p: Double, values: List): Value + + sealed interface Selecting : QuantileEstimationMethod, Int> { + + /** Inverse of the empirical distribution function. */ + data object R1 : Selecting { + override fun oneBasedIndexOfQuantile(p: Double, count: Int): Int = + ceil(p * count).toInt() + .coerceIn(1..count) + + @Suppress("UNCHECKED_CAST") + override fun quantile(p: Double, values: List>): Comparable { + val h = indexOfQuantile(p, values.size).toInt() + return values.quickSelect(h) + } + } + + /** The observation closest to `count * p` */ + data object R3 : Selecting { + // following apache commons + paper instead of wikipedia + override fun oneBasedIndexOfQuantile(p: Double, count: Int): Int = + round(count * p).toInt() + .coerceIn(1..count) + + @Suppress("UNCHECKED_CAST") + override fun quantile(p: Double, values: List>): Comparable { + val h = indexOfQuantile(p, values.size).toInt() + return values.quickSelect(h) + } + } + } + + // TODO add R2, R4, R5, R6, R9 https://github.com/Kotlin/dataframe/issues/1121 + sealed interface Interpolating : QuantileEstimationMethod { + + /** Linear interpolation of the modes for the order statistics for the uniform distribution on [0, 1]. */ + data object R7 : Interpolating, PieceWiseLinear { + override fun oneBasedIndexOfQuantile(p: Double, count: Int): Double = + ((count - 1.0) * p + 1.0) + .coerceIn(1.0..count.toDouble()) + } + + /** Linear interpolation of the approximate medians for order statistics. Recommended by H & F. */ + data object R8 : Interpolating, PieceWiseLinear { + override fun oneBasedIndexOfQuantile(p: Double, count: Int): Double = + ((count + 1.0 / 3.0) * p + 1.0 / 3.0) + .coerceIn(1.0..count.toDouble()) + } + + private interface PieceWiseLinear : Interpolating { + override fun quantile(p: Double, values: List): Double { + val h = oneBasedIndexOfQuantile(p, values.size) + return values.quickSelect(floor(h).toInt() - 1) + (h - floor(h)) * ( + values.quickSelect(ceil(h).toInt() - 1) - + values.quickSelect(floor(h).toInt() - 1) + ) + } + } + } + + // shortcuts to the various estimation methods + // TODO add R2, R4, R5, R6, R9 https://github.com/Kotlin/dataframe/issues/1121 + companion object { + val R1 = Selecting.R1 + val R3 = Selecting.R3 + val R7 = Interpolating.R7 + val R8 = Interpolating.R8 + } +} + +// overload to get the right comparable type +@Suppress("UNCHECKED_CAST") +internal fun > QuantileEstimationMethod.Selecting.quantile(p: Double, values: List): T = + quantile(p, values as List>) as T + +@Suppress("UNCHECKED_CAST") +internal fun > QuantileEstimationMethod.Selecting.cast(): QuantileEstimationMethod = + this as QuantileEstimationMethod + +// corrects oneBasedIndexOfQuantile to zero-based index +@Suppress("UNCHECKED_CAST") +internal fun QuantileEstimationMethod<*, IndexType>.indexOfQuantile( + p: Double, + count: Int, +): IndexType { + val oneBased = oneBasedIndexOfQuantile(p = p, count = count) + return when (this) { + is QuantileEstimationMethod.Interpolating -> oneBased as Double - 1.0 + is QuantileEstimationMethod.Selecting -> oneBased as Int - 1 + } as IndexType +} + +/** + * Select the k't "smallest" element from list [this] + */ +@PublishedApi +internal fun > List.quickSelect(k: Int): T { + if (k < 0 || k >= size) throw IndexOutOfBoundsException("k = $k, size = $size") + + var list = this + var temp = mutableListOf() + var less = mutableListOf() + var k = k + var greater = mutableListOf() + while (list.size > 1) { + var equal = 0 + val x = list.random() + greater.clear() + less.clear() + for (v in list) { + val comp = v.compareTo(x) + when { + comp < 0 -> less.add(v) + comp > 0 -> greater.add(v) + else -> equal++ + } + } + when { + k < less.size -> { + list = less + less = temp + temp = list + } + + k < less.size + equal -> { + return x + } + + else -> { + list = greater + greater = temp + temp = list + k -= less.size + equal + } + } + } + return list[0] +} + +internal data class IndexedComparable>(val index: Int, val value: T) : + Comparable> { + override fun compareTo(other: IndexedComparable): Int = value.compareTo(other.value) +} diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/util/deprecationMessages.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/util/deprecationMessages.kt index 94ea105d5d..60e5e34bbb 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/util/deprecationMessages.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/util/deprecationMessages.kt @@ -83,6 +83,10 @@ internal const val ROW_MAX_OR_NULL = "`rowMaxOrNull` is deprecated in favor of ` internal const val ROW_MEDIAN = "`rowMedian` is deprecated in favor of `rowMedianOf`. $MESSAGE_0_16" internal const val ROW_MEDIAN_OR_NULL = "`rowMedianOrNull` is deprecated in favor of `rowMedianOfOrNull`. $MESSAGE_0_16" +internal const val ROW_PERCENTILE = "`rowPercentile` is deprecated in favor of `rowPercentileOf`. $MESSAGE_0_16" +internal const val ROW_PERCENTILE_OR_NULL = + "`rowPercentileOrNull` is deprecated in favor of `rowPercentileOfOrNull`. $MESSAGE_0_16" + internal const val SUM_NO_SKIPNAN = "This function is just here for binary compatibility. $MESSAGE_0_16" internal const val MAX_NO_SKIPNAN = "This function is just here for binary compatibility. $MESSAGE_0_16" internal const val MIN_NO_SKIPNAN = "This function is just here for binary compatibility. $MESSAGE_0_16" diff --git a/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/describe.kt b/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/describe.kt index 206f00d0f4..09889459a9 100644 --- a/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/describe.kt +++ b/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/describe.kt @@ -65,7 +65,7 @@ class DescribeTests { mean.shouldBeNaN() std.shouldBeNaN() min.isNaN shouldBe true - p25 shouldBe 1.75 + p25.isNaN shouldBe true median.isNaN shouldBe true p75.isNaN shouldBe true max.isNaN shouldBe true diff --git a/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/statistics/QuantileTests.kt b/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/statistics/QuantileTests.kt new file mode 100644 index 0000000000..eccdcf82c6 --- /dev/null +++ b/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/statistics/QuantileTests.kt @@ -0,0 +1,848 @@ +package org.jetbrains.kotlinx.dataframe.statistics + +import io.kotest.matchers.doubles.plusOrMinus +import io.kotest.matchers.shouldBe +import org.apache.commons.statistics.descriptive.Quantile +import org.jetbrains.kotlinx.dataframe.math.QuantileEstimationMethod +import org.jetbrains.kotlinx.dataframe.math.quantileOrNull +import org.junit.Test +import kotlin.reflect.typeOf + +class QuantileTests { + + @Test + fun `linear estimation`() { + // Test R8 with Double - p = 0.1 (10th percentile) + sequenceOf(1.0, 4.0, 3.0, 2.0).quantileOrNull( + p = 0.1, + type = typeOf(), + skipNaN = true, + method = QuantileEstimationMethod.R8, + ) shouldBe Quantile.withDefaults().with(Quantile.EstimationMethod.HF8) + .evaluate(doubleArrayOf(1.0, 4.0, 3.0, 2.0), 0.1) + .plusOrMinus(1e-10) + + // Test R8 with Double - p = 0.5 (median) + sequenceOf(1.0, 4.0, 3.0, 2.0).quantileOrNull( + p = 0.5, + type = typeOf(), + skipNaN = true, + method = QuantileEstimationMethod.R8, + ) shouldBe Quantile.withDefaults().with(Quantile.EstimationMethod.HF8) + .evaluate(doubleArrayOf(1.0, 4.0, 3.0, 2.0), 0.5) + .plusOrMinus(1e-10) + + // Test R8 with Double - p = 0.25 (first quartile) + sequenceOf(1.0, 4.0, 3.0, 2.0).quantileOrNull( + p = 0.25, + type = typeOf(), + skipNaN = true, + method = QuantileEstimationMethod.R8, + ) shouldBe Quantile.withDefaults().with(Quantile.EstimationMethod.HF8) + .evaluate(doubleArrayOf(1.0, 4.0, 3.0, 2.0), 0.25) + .plusOrMinus(1e-10) + + // Test R8 with Double - p = 0.75 (third quartile) + sequenceOf(1.0, 4.0, 3.0, 2.0).quantileOrNull( + p = 0.75, + type = typeOf(), + skipNaN = true, + method = QuantileEstimationMethod.R8, + ) shouldBe Quantile.withDefaults().with(Quantile.EstimationMethod.HF8) + .evaluate(doubleArrayOf(1.0, 4.0, 3.0, 2.0), 0.75) + .plusOrMinus(1e-10) + + // Test R8 with Double - p = 0.9 (90th percentile) + sequenceOf(1.0, 4.0, 3.0, 2.0).quantileOrNull( + p = 0.9, + type = typeOf(), + skipNaN = true, + method = QuantileEstimationMethod.R8, + ) shouldBe Quantile.withDefaults().with(Quantile.EstimationMethod.HF8) + .evaluate(doubleArrayOf(1.0, 4.0, 3.0, 2.0), 0.9) + .plusOrMinus(1e-10) + + // Test R7 with Double - p = 0.1 (10th percentile) + sequenceOf(1.0, 4.0, 3.0, 2.0).quantileOrNull( + p = 0.1, + type = typeOf(), + skipNaN = true, + method = QuantileEstimationMethod.R7, + ) shouldBe Quantile.withDefaults().with(Quantile.EstimationMethod.HF7) + .evaluate(doubleArrayOf(1.0, 4.0, 3.0, 2.0), 0.1) + .plusOrMinus(1e-10) + + // Test R7 with Double - p = 0.5 (median) + sequenceOf(1.0, 4.0, 3.0, 2.0).quantileOrNull( + p = 0.5, + type = typeOf(), + skipNaN = true, + method = QuantileEstimationMethod.R7, + ) shouldBe Quantile.withDefaults().with(Quantile.EstimationMethod.HF7) + .evaluate(doubleArrayOf(1.0, 4.0, 3.0, 2.0), 0.5) + .plusOrMinus(1e-10) + + // Test R7 with Double - p = 0.25 (first quartile) + sequenceOf(1.0, 4.0, 3.0, 2.0).quantileOrNull( + p = 0.25, + type = typeOf(), + skipNaN = true, + method = QuantileEstimationMethod.R7, + ) shouldBe Quantile.withDefaults().with(Quantile.EstimationMethod.HF7) + .evaluate(doubleArrayOf(1.0, 4.0, 3.0, 2.0), 0.25) + .plusOrMinus(1e-10) + + // Test R7 with Double - p = 0.75 (third quartile) + sequenceOf(1.0, 4.0, 3.0, 2.0).quantileOrNull( + p = 0.75, + type = typeOf(), + skipNaN = true, + method = QuantileEstimationMethod.R7, + ) shouldBe Quantile.withDefaults().with(Quantile.EstimationMethod.HF7) + .evaluate(doubleArrayOf(1.0, 4.0, 3.0, 2.0), 0.75) + .plusOrMinus(1e-10) + + // Test R7 with Double - p = 0.9 (90th percentile) + sequenceOf(1.0, 4.0, 3.0, 2.0).quantileOrNull( + p = 0.9, + type = typeOf(), + skipNaN = true, + method = QuantileEstimationMethod.R7, + ) shouldBe Quantile.withDefaults().with(Quantile.EstimationMethod.HF7) + .evaluate(doubleArrayOf(1.0, 4.0, 3.0, 2.0), 0.9) + .plusOrMinus(1e-10) + + // Test R8 with Int - p = 0.5 (median) + sequenceOf(1, 4, 3, 2).quantileOrNull( + p = 0.5, + type = typeOf(), + skipNaN = true, + method = QuantileEstimationMethod.R8, + ) shouldBe Quantile.withDefaults().with(Quantile.EstimationMethod.HF8) + .evaluate(doubleArrayOf(1.0, 4.0, 3.0, 2.0), 0.5) + .plusOrMinus(1e-10) + + // Test R8 with Int - p = 0.25 (first quartile) + sequenceOf(1, 4, 3, 2).quantileOrNull( + p = 0.25, + type = typeOf(), + skipNaN = true, + method = QuantileEstimationMethod.R8, + ) shouldBe Quantile.withDefaults().with(Quantile.EstimationMethod.HF8) + .evaluate(doubleArrayOf(1.0, 4.0, 3.0, 2.0), 0.25) + .plusOrMinus(1e-10) + + // Test R8 with Int - p = 0.75 (third quartile) + sequenceOf(1, 4, 3, 2).quantileOrNull( + p = 0.75, + type = typeOf(), + skipNaN = true, + method = QuantileEstimationMethod.R8, + ) shouldBe Quantile.withDefaults().with(Quantile.EstimationMethod.HF8) + .evaluate(doubleArrayOf(1.0, 4.0, 3.0, 2.0), 0.75) + .plusOrMinus(1e-10) + + // Test R7 with Int - p = 0.5 (median) + sequenceOf(1, 4, 3, 2).quantileOrNull( + p = 0.5, + type = typeOf(), + skipNaN = true, + method = QuantileEstimationMethod.R7, + ) shouldBe Quantile.withDefaults().with(Quantile.EstimationMethod.HF7) + .evaluate(doubleArrayOf(1.0, 4.0, 3.0, 2.0), 0.5) + .plusOrMinus(1e-10) + + // Test R7 with Int - p = 0.25 (first quartile) + sequenceOf(1, 4, 3, 2).quantileOrNull( + p = 0.25, + type = typeOf(), + skipNaN = true, + method = QuantileEstimationMethod.R7, + ) shouldBe Quantile.withDefaults().with(Quantile.EstimationMethod.HF7) + .evaluate(doubleArrayOf(1.0, 4.0, 3.0, 2.0), 0.25) + .plusOrMinus(1e-10) + + // Test R7 with Int - p = 0.75 (third quartile) + sequenceOf(1, 4, 3, 2).quantileOrNull( + p = 0.75, + type = typeOf(), + skipNaN = true, + method = QuantileEstimationMethod.R7, + ) shouldBe Quantile.withDefaults().with(Quantile.EstimationMethod.HF7) + .evaluate(doubleArrayOf(1.0, 4.0, 3.0, 2.0), 0.75) + .plusOrMinus(1e-10) + + // Test R8 with Float - p = 0.5 (median) + sequenceOf(1f, 4f, 3f, 2f).quantileOrNull( + p = 0.5, + type = typeOf(), + skipNaN = true, + method = QuantileEstimationMethod.R8, + ) shouldBe Quantile.withDefaults().with(Quantile.EstimationMethod.HF8) + .evaluate(doubleArrayOf(1.0, 4.0, 3.0, 2.0), 0.5) + .plusOrMinus(1e-10) + + // Test R8 with Float - p = 0.25 (first quartile) + sequenceOf(1f, 4f, 3f, 2f).quantileOrNull( + p = 0.25, + type = typeOf(), + skipNaN = true, + method = QuantileEstimationMethod.R8, + ) shouldBe Quantile.withDefaults().with(Quantile.EstimationMethod.HF8) + .evaluate(doubleArrayOf(1.0, 4.0, 3.0, 2.0), 0.25) + .plusOrMinus(1e-10) + + // Test R8 with Float - p = 0.75 (third quartile) + sequenceOf(1f, 4f, 3f, 2f).quantileOrNull( + p = 0.75, + type = typeOf(), + skipNaN = true, + method = QuantileEstimationMethod.R8, + ) shouldBe Quantile.withDefaults().with(Quantile.EstimationMethod.HF8) + .evaluate(doubleArrayOf(1.0, 4.0, 3.0, 2.0), 0.75) + .plusOrMinus(1e-10) + + // Test R7 with Float - p = 0.5 (median) + sequenceOf(1f, 4f, 3f, 2f).quantileOrNull( + p = 0.5, + type = typeOf(), + skipNaN = true, + method = QuantileEstimationMethod.R7, + ) shouldBe Quantile.withDefaults().with(Quantile.EstimationMethod.HF7) + .evaluate(doubleArrayOf(1.0, 4.0, 3.0, 2.0), 0.5) + .plusOrMinus(1e-10) + + // Test R7 with Float - p = 0.25 (first quartile) + sequenceOf(1f, 4f, 3f, 2f).quantileOrNull( + p = 0.25, + type = typeOf(), + skipNaN = true, + method = QuantileEstimationMethod.R7, + ) shouldBe Quantile.withDefaults().with(Quantile.EstimationMethod.HF7) + .evaluate(doubleArrayOf(1.0, 4.0, 3.0, 2.0), 0.25) + .plusOrMinus(1e-10) + + // Test R7 with Float - p = 0.75 (third quartile) + sequenceOf(1f, 4f, 3f, 2f).quantileOrNull( + p = 0.75, + type = typeOf(), + skipNaN = true, + method = QuantileEstimationMethod.R7, + ) shouldBe Quantile.withDefaults().with(Quantile.EstimationMethod.HF7) + .evaluate(doubleArrayOf(1.0, 4.0, 3.0, 2.0), 0.75) + .plusOrMinus(1e-10) + + // Test R8 with Long - p = 0.5 (median) + sequenceOf(1L, 4L, 3L, 2L).quantileOrNull( + p = 0.5, + type = typeOf(), + skipNaN = true, + method = QuantileEstimationMethod.R8, + ) shouldBe Quantile.withDefaults().with(Quantile.EstimationMethod.HF8) + .evaluate(doubleArrayOf(1.0, 4.0, 3.0, 2.0), 0.5) + .plusOrMinus(1e-10) + + // Test R8 with Long - p = 0.25 (first quartile) + sequenceOf(1L, 4L, 3L, 2L).quantileOrNull( + p = 0.25, + type = typeOf(), + skipNaN = true, + method = QuantileEstimationMethod.R8, + ) shouldBe Quantile.withDefaults().with(Quantile.EstimationMethod.HF8) + .evaluate(doubleArrayOf(1.0, 4.0, 3.0, 2.0), 0.25) + .plusOrMinus(1e-10) + + // Test R8 with Long - p = 0.75 (third quartile) + sequenceOf(1L, 4L, 3L, 2L).quantileOrNull( + p = 0.75, + type = typeOf(), + skipNaN = true, + method = QuantileEstimationMethod.R8, + ) shouldBe Quantile.withDefaults().with(Quantile.EstimationMethod.HF8) + .evaluate(doubleArrayOf(1.0, 4.0, 3.0, 2.0), 0.75) + .plusOrMinus(1e-10) + + // Test R7 with Long - p = 0.5 (median) + sequenceOf(1L, 4L, 3L, 2L).quantileOrNull( + p = 0.5, + type = typeOf(), + skipNaN = true, + method = QuantileEstimationMethod.R7, + ) shouldBe Quantile.withDefaults().with(Quantile.EstimationMethod.HF7) + .evaluate(doubleArrayOf(1.0, 4.0, 3.0, 2.0), 0.5) + .plusOrMinus(1e-10) + + // Test R7 with Long - p = 0.25 (first quartile) + sequenceOf(1L, 4L, 3L, 2L).quantileOrNull( + p = 0.25, + type = typeOf(), + skipNaN = true, + method = QuantileEstimationMethod.R7, + ) shouldBe Quantile.withDefaults().with(Quantile.EstimationMethod.HF7) + .evaluate(doubleArrayOf(1.0, 4.0, 3.0, 2.0), 0.25) + .plusOrMinus(1e-10) + + // Test R7 with Long - p = 0.75 (third quartile) + sequenceOf(1L, 4L, 3L, 2L).quantileOrNull( + p = 0.75, + type = typeOf(), + skipNaN = true, + method = QuantileEstimationMethod.R7, + ) shouldBe Quantile.withDefaults().with(Quantile.EstimationMethod.HF7) + .evaluate(doubleArrayOf(1.0, 4.0, 3.0, 2.0), 0.75) + .plusOrMinus(1e-10) + + // Test with NaN values and skipNaN = false - p = 0.5 (median) + val nanResult = sequenceOf(1.0, Double.NaN, 3.0, 2.0).quantileOrNull( + p = 0.5, + type = typeOf(), + skipNaN = false, + method = QuantileEstimationMethod.R8, + ) + (nanResult as Double).isNaN() shouldBe true + + // Test with NaN values and skipNaN = false - p = 0.25 (first quartile) + val nanResult25 = sequenceOf(1.0, Double.NaN, 3.0, 2.0).quantileOrNull( + p = 0.25, + type = typeOf(), + skipNaN = false, + method = QuantileEstimationMethod.R8, + ) + (nanResult25 as Double).isNaN() shouldBe true + + // Test with NaN values and skipNaN = false - p = 0.75 (third quartile) + val nanResult75 = sequenceOf(1.0, Double.NaN, 3.0, 2.0).quantileOrNull( + p = 0.75, + type = typeOf(), + skipNaN = false, + method = QuantileEstimationMethod.R8, + ) + (nanResult75 as Double).isNaN() shouldBe true + + // Test with NaN values and skipNaN = true - p = 0.5 (median) + sequenceOf(1.0, Double.NaN, 3.0, 2.0).quantileOrNull( + p = 0.5, + type = typeOf(), + skipNaN = true, + method = QuantileEstimationMethod.R8, + ) shouldBe Quantile.withDefaults().with(Quantile.EstimationMethod.HF8) + .evaluate(doubleArrayOf(1.0, 3.0, 2.0), 0.5) + .plusOrMinus(1e-10) + + // Test with NaN values and skipNaN = true - p = 0.25 (first quartile) + sequenceOf(1.0, Double.NaN, 3.0, 2.0).quantileOrNull( + p = 0.25, + type = typeOf(), + skipNaN = true, + method = QuantileEstimationMethod.R8, + ) shouldBe Quantile.withDefaults().with(Quantile.EstimationMethod.HF8) + .evaluate(doubleArrayOf(1.0, 3.0, 2.0), 0.25) + .plusOrMinus(1e-10) + + // Test with NaN values and skipNaN = true - p = 0.75 (third quartile) + sequenceOf(1.0, Double.NaN, 3.0, 2.0).quantileOrNull( + p = 0.75, + type = typeOf(), + skipNaN = true, + method = QuantileEstimationMethod.R8, + ) shouldBe Quantile.withDefaults().with(Quantile.EstimationMethod.HF8) + .evaluate(doubleArrayOf(1.0, 3.0, 2.0), 0.75) + .plusOrMinus(1e-10) + } + + @Suppress("UNCHECKED_CAST") + @Test + fun `constant estimation`() { + // Test R3 with Char - p = 0.1 (10th percentile) + sequenceOf('a', 'c', 'b').quantileOrNull( + p = 0.1, + type = typeOf(), + skipNaN = true, + method = QuantileEstimationMethod.R3 as QuantileEstimationMethod, + ) shouldBe 'a' + + // Test R3 with Char - p = 0.5 (median) + sequenceOf('a', 'c', 'b').quantileOrNull( + p = 0.5, + type = typeOf(), + skipNaN = true, + method = QuantileEstimationMethod.R3 as QuantileEstimationMethod, + ) shouldBe 'b' + + // Test R3 with Char - p = 0.25 (first quartile) + sequenceOf('a', 'c', 'b').quantileOrNull( + p = 0.25, + type = typeOf(), + skipNaN = true, + method = QuantileEstimationMethod.R3 as QuantileEstimationMethod, + ) shouldBe 'a' + + // Test R3 with Char - p = 0.75 (third quartile) + sequenceOf('a', 'c', 'b').quantileOrNull( + p = 0.75, + type = typeOf(), + skipNaN = true, + method = QuantileEstimationMethod.R3 as QuantileEstimationMethod, + ) shouldBe 'b' + + // Test R3 with Char - p = 0.9 (90th percentile) + sequenceOf('a', 'c', 'b').quantileOrNull( + p = 0.9, + type = typeOf(), + skipNaN = true, + method = QuantileEstimationMethod.R3 as QuantileEstimationMethod, + ) shouldBe 'c' + + // Test R1 with Char - p = 0.1 (10th percentile) + sequenceOf('a', 'c', 'b').quantileOrNull( + p = 0.1, + type = typeOf(), + skipNaN = true, + method = QuantileEstimationMethod.R1 as QuantileEstimationMethod, + ) shouldBe 'a' + + // Test R1 with Char - p = 0.5 (median) + sequenceOf('a', 'c', 'b').quantileOrNull( + p = 0.5, + type = typeOf(), + skipNaN = true, + method = QuantileEstimationMethod.R1 as QuantileEstimationMethod, + ) shouldBe 'b' + + // Test R1 with Char - p = 0.25 (first quartile) + sequenceOf('a', 'c', 'b').quantileOrNull( + p = 0.25, + type = typeOf(), + skipNaN = true, + method = QuantileEstimationMethod.R1 as QuantileEstimationMethod, + ) shouldBe 'a' + + // Test R1 with Char - p = 0.75 (third quartile) + sequenceOf('a', 'c', 'b').quantileOrNull( + p = 0.75, + type = typeOf(), + skipNaN = true, + method = QuantileEstimationMethod.R1 as QuantileEstimationMethod, + ) shouldBe 'c' + + // Test R1 with Char - p = 0.9 (90th percentile) + sequenceOf('a', 'c', 'b').quantileOrNull( + p = 0.9, + type = typeOf(), + skipNaN = true, + method = QuantileEstimationMethod.R1 as QuantileEstimationMethod, + ) shouldBe 'c' + + // Test R3 with String - p = 0.5 (median) + sequenceOf("apple", "cherry", "banana").quantileOrNull( + p = 0.5, + type = typeOf(), + skipNaN = true, + method = QuantileEstimationMethod.R3 as QuantileEstimationMethod, + ) shouldBe "banana" + + // Test R3 with String - p = 0.25 (first quartile) + sequenceOf("apple", "cherry", "banana").quantileOrNull( + p = 0.25, + type = typeOf(), + skipNaN = true, + method = QuantileEstimationMethod.R3 as QuantileEstimationMethod, + ) shouldBe "apple" + + // Test R3 with String - p = 0.75 (third quartile) + sequenceOf("apple", "cherry", "banana").quantileOrNull( + p = 0.75, + type = typeOf(), + skipNaN = true, + method = QuantileEstimationMethod.R3 as QuantileEstimationMethod, + ) shouldBe "banana" + + // Test R1 with String - p = 0.5 (median) + sequenceOf("apple", "cherry", "banana").quantileOrNull( + p = 0.5, + type = typeOf(), + skipNaN = true, + method = QuantileEstimationMethod.R1 as QuantileEstimationMethod, + ) shouldBe "banana" + + // Test R1 with String - p = 0.25 (first quartile) + sequenceOf("apple", "cherry", "banana").quantileOrNull( + p = 0.25, + type = typeOf(), + skipNaN = true, + method = QuantileEstimationMethod.R1 as QuantileEstimationMethod, + ) shouldBe "apple" + + // Test R1 with String - p = 0.75 (third quartile) + sequenceOf("apple", "cherry", "banana").quantileOrNull( + p = 0.75, + type = typeOf(), + skipNaN = true, + method = QuantileEstimationMethod.R1 as QuantileEstimationMethod, + ) shouldBe "cherry" + + // Test R3 with Int (primitive number) - p = 0.5 (median) + sequenceOf(1, 4, 3, 2).quantileOrNull( + p = 0.5, + type = typeOf(), + skipNaN = true, + method = QuantileEstimationMethod.R3 as QuantileEstimationMethod, + ) shouldBe Quantile.withDefaults().with(Quantile.EstimationMethod.HF3) + .evaluate(doubleArrayOf(1.0, 4.0, 3.0, 2.0), 0.5) + .toInt() + + // Test R3 with Int (primitive number) - p = 0.25 (first quartile) + sequenceOf(1, 4, 3, 2).quantileOrNull( + p = 0.25, + type = typeOf(), + skipNaN = true, + method = QuantileEstimationMethod.R3 as QuantileEstimationMethod, + ) shouldBe Quantile.withDefaults().with(Quantile.EstimationMethod.HF3) + .evaluate(doubleArrayOf(1.0, 4.0, 3.0, 2.0), 0.25) + .toInt() + + // Test R3 with Int (primitive number) - p = 0.75 (third quartile) + sequenceOf(1, 4, 3, 2).quantileOrNull( + p = 0.75, + type = typeOf(), + skipNaN = true, + method = QuantileEstimationMethod.R3 as QuantileEstimationMethod, + ) shouldBe Quantile.withDefaults().with(Quantile.EstimationMethod.HF3) + .evaluate(doubleArrayOf(1.0, 4.0, 3.0, 2.0), 0.75) + .toInt() + + // Test R1 with Int (primitive number) - p = 0.5 (median) + sequenceOf(1, 4, 3, 2).quantileOrNull( + p = 0.5, + type = typeOf(), + skipNaN = true, + method = QuantileEstimationMethod.R1 as QuantileEstimationMethod, + ) shouldBe Quantile.withDefaults().with(Quantile.EstimationMethod.HF1) + .evaluate(doubleArrayOf(1.0, 4.0, 3.0, 2.0), 0.5) + .toInt() + + // Test R1 with Int (primitive number) - p = 0.25 (first quartile) + sequenceOf(1, 4, 3, 2).quantileOrNull( + p = 0.25, + type = typeOf(), + skipNaN = true, + method = QuantileEstimationMethod.R1 as QuantileEstimationMethod, + ) shouldBe Quantile.withDefaults().with(Quantile.EstimationMethod.HF1) + .evaluate(doubleArrayOf(1.0, 4.0, 3.0, 2.0), 0.25) + .toInt() + + // Test R1 with Int (primitive number) - p = 0.75 (third quartile) + sequenceOf(1, 4, 3, 2).quantileOrNull( + p = 0.75, + type = typeOf(), + skipNaN = true, + method = QuantileEstimationMethod.R1 as QuantileEstimationMethod, + ) shouldBe Quantile.withDefaults().with(Quantile.EstimationMethod.HF1) + .evaluate(doubleArrayOf(1.0, 4.0, 3.0, 2.0), 0.75) + .toInt() + + // Test R3 with Double (primitive number) - p = 0.5 (median) + sequenceOf(1.0, 4.0, 3.0, 2.0).quantileOrNull( + p = 0.5, + type = typeOf(), + skipNaN = true, + method = QuantileEstimationMethod.R3 as QuantileEstimationMethod, + ) shouldBe Quantile.withDefaults().with(Quantile.EstimationMethod.HF3) + .evaluate(doubleArrayOf(1.0, 4.0, 3.0, 2.0), 0.5) + + // Test R3 with Double (primitive number) - p = 0.25 (first quartile) + sequenceOf(1.0, 4.0, 3.0, 2.0).quantileOrNull( + p = 0.25, + type = typeOf(), + skipNaN = true, + method = QuantileEstimationMethod.R3 as QuantileEstimationMethod, + ) shouldBe Quantile.withDefaults().with(Quantile.EstimationMethod.HF3) + .evaluate(doubleArrayOf(1.0, 4.0, 3.0, 2.0), 0.25) + + // Test R3 with Double (primitive number) - p = 0.75 (third quartile) + sequenceOf(1.0, 4.0, 3.0, 2.0).quantileOrNull( + p = 0.75, + type = typeOf(), + skipNaN = true, + method = QuantileEstimationMethod.R3 as QuantileEstimationMethod, + ) shouldBe Quantile.withDefaults().with(Quantile.EstimationMethod.HF3) + .evaluate(doubleArrayOf(1.0, 4.0, 3.0, 2.0), 0.75) + + // Test R1 with Double (primitive number) - p = 0.5 (median) + sequenceOf(1.0, 4.0, 3.0, 2.0).quantileOrNull( + p = 0.5, + type = typeOf(), + skipNaN = true, + method = QuantileEstimationMethod.R1 as QuantileEstimationMethod, + ) shouldBe Quantile.withDefaults().with(Quantile.EstimationMethod.HF1) + .evaluate(doubleArrayOf(1.0, 4.0, 3.0, 2.0), 0.5) + + // Test R1 with Double (primitive number) - p = 0.25 (first quartile) + sequenceOf(1.0, 4.0, 3.0, 2.0).quantileOrNull( + p = 0.25, + type = typeOf(), + skipNaN = true, + method = QuantileEstimationMethod.R1 as QuantileEstimationMethod, + ) shouldBe Quantile.withDefaults().with(Quantile.EstimationMethod.HF1) + .evaluate(doubleArrayOf(1.0, 4.0, 3.0, 2.0), 0.25) + + // Test R1 with Double (primitive number) - p = 0.75 (third quartile) + sequenceOf(1.0, 4.0, 3.0, 2.0).quantileOrNull( + p = 0.75, + type = typeOf(), + skipNaN = true, + method = QuantileEstimationMethod.R1 as QuantileEstimationMethod, + ) shouldBe Quantile.withDefaults().with(Quantile.EstimationMethod.HF1) + .evaluate(doubleArrayOf(1.0, 4.0, 3.0, 2.0), 0.75) + + // Test with NaN values and skipNaN = false - p = 0.5 (median) + val nanResult = sequenceOf(1.0, Double.NaN, 3.0, 2.0).quantileOrNull( + p = 0.5, + type = typeOf(), + skipNaN = false, + method = QuantileEstimationMethod.R3 as QuantileEstimationMethod, + ) + (nanResult as Double).isNaN() shouldBe true + + // Test with NaN values and skipNaN = false - p = 0.25 (first quartile) + val nanResult25 = sequenceOf(1.0, Double.NaN, 3.0, 2.0).quantileOrNull( + p = 0.25, + type = typeOf(), + skipNaN = false, + method = QuantileEstimationMethod.R3 as QuantileEstimationMethod, + ) + (nanResult25 as Double).isNaN() shouldBe true + + // Test with NaN values and skipNaN = false - p = 0.75 (third quartile) + val nanResult75 = sequenceOf(1.0, Double.NaN, 3.0, 2.0).quantileOrNull( + p = 0.75, + type = typeOf(), + skipNaN = false, + method = QuantileEstimationMethod.R3 as QuantileEstimationMethod, + ) + (nanResult75 as Double).isNaN() shouldBe true + + // Test with NaN values and skipNaN = true - p = 0.5 (median) + sequenceOf(1.0, Double.NaN, 3.0, 2.0).quantileOrNull( + p = 0.5, + type = typeOf(), + skipNaN = true, + method = QuantileEstimationMethod.R3 as QuantileEstimationMethod, + ) shouldBe Quantile.withDefaults().with(Quantile.EstimationMethod.HF3) + .evaluate(doubleArrayOf(1.0, 3.0, 2.0), 0.5) + + // Test with NaN values and skipNaN = true - p = 0.25 (first quartile) + sequenceOf(1.0, Double.NaN, 3.0, 2.0).quantileOrNull( + p = 0.25, + type = typeOf(), + skipNaN = true, + method = QuantileEstimationMethod.R3 as QuantileEstimationMethod, + ) shouldBe Quantile.withDefaults().with(Quantile.EstimationMethod.HF3) + .evaluate(doubleArrayOf(1.0, 3.0, 2.0), 0.25) + + // Test with NaN values and skipNaN = true - p = 0.75 (third quartile) + sequenceOf(1.0, Double.NaN, 3.0, 2.0).quantileOrNull( + p = 0.75, + type = typeOf(), + skipNaN = true, + method = QuantileEstimationMethod.R3 as QuantileEstimationMethod, + ) shouldBe Quantile.withDefaults().with(Quantile.EstimationMethod.HF3) + .evaluate(doubleArrayOf(1.0, 3.0, 2.0), 0.75) + } + + @Test + fun `edge cases`() { + // Empty sequence - p = 0.1 (10th percentile) + sequenceOf().quantileOrNull( + p = 0.1, + type = typeOf(), + skipNaN = true, + method = QuantileEstimationMethod.R8, + ) shouldBe null + + // Empty sequence - p = 0.5 (median) + sequenceOf().quantileOrNull( + p = 0.5, + type = typeOf(), + skipNaN = true, + method = QuantileEstimationMethod.R8, + ) shouldBe null + + // Empty sequence - p = 0.25 (first quartile) + sequenceOf().quantileOrNull( + p = 0.25, + type = typeOf(), + skipNaN = true, + method = QuantileEstimationMethod.R8, + ) shouldBe null + + // Empty sequence - p = 0.75 (third quartile) + sequenceOf().quantileOrNull( + p = 0.75, + type = typeOf(), + skipNaN = true, + method = QuantileEstimationMethod.R8, + ) shouldBe null + + // Empty sequence - p = 0.9 (90th percentile) + sequenceOf().quantileOrNull( + p = 0.9, + type = typeOf(), + skipNaN = true, + method = QuantileEstimationMethod.R8, + ) shouldBe null + + // Single element sequence - linear estimation - p = 0.1 (10th percentile) + sequenceOf(5.0).quantileOrNull( + p = 0.1, + type = typeOf(), + skipNaN = true, + method = QuantileEstimationMethod.R8, + ) shouldBe 5.0 + + // Single element sequence - linear estimation - p = 0.5 (median) + sequenceOf(5.0).quantileOrNull( + p = 0.5, + type = typeOf(), + skipNaN = true, + method = QuantileEstimationMethod.R8, + ) shouldBe 5.0 + + // Single element sequence - linear estimation - p = 0.25 (first quartile) + sequenceOf(5.0).quantileOrNull( + p = 0.25, + type = typeOf(), + skipNaN = true, + method = QuantileEstimationMethod.R8, + ) shouldBe 5.0 + + // Single element sequence - linear estimation - p = 0.75 (third quartile) + sequenceOf(5.0).quantileOrNull( + p = 0.75, + type = typeOf(), + skipNaN = true, + method = QuantileEstimationMethod.R8, + ) shouldBe 5.0 + + // Single element sequence - linear estimation - p = 0.9 (90th percentile) + sequenceOf(5.0).quantileOrNull( + p = 0.9, + type = typeOf(), + skipNaN = true, + method = QuantileEstimationMethod.R8, + ) shouldBe 5.0 + + // Single element sequence - constant estimation - p = 0.5 (median) + sequenceOf("test").quantileOrNull( + p = 0.5, + type = typeOf(), + skipNaN = true, + method = QuantileEstimationMethod.R3 as QuantileEstimationMethod, + ) shouldBe "test" + + // Single element sequence - constant estimation - p = 0.25 (first quartile) + sequenceOf("test").quantileOrNull( + p = 0.25, + type = typeOf(), + skipNaN = true, + method = QuantileEstimationMethod.R3 as QuantileEstimationMethod, + ) shouldBe "test" + + // Single element sequence - constant estimation - p = 0.75 (third quartile) + sequenceOf("test").quantileOrNull( + p = 0.75, + type = typeOf(), + skipNaN = true, + method = QuantileEstimationMethod.R3 as QuantileEstimationMethod, + ) shouldBe "test" + + // We don't test extreme low quantile values (p close to 0.0) as they can cause index calculation issues + + // We don't test extreme high quantile values (p close to 1.0) as they can cause index calculation issues + + // All NaN values with skipNaN = true - p = 0.1 (10th percentile) + sequenceOf(Double.NaN, Double.NaN).quantileOrNull( + p = 0.1, + type = typeOf(), + skipNaN = true, + method = QuantileEstimationMethod.R8, + ) shouldBe null + + // All NaN values with skipNaN = true - p = 0.5 (median) + sequenceOf(Double.NaN, Double.NaN).quantileOrNull( + p = 0.5, + type = typeOf(), + skipNaN = true, + method = QuantileEstimationMethod.R8, + ) shouldBe null + + // All NaN values with skipNaN = true - p = 0.25 (first quartile) + sequenceOf(Double.NaN, Double.NaN).quantileOrNull( + p = 0.25, + type = typeOf(), + skipNaN = true, + method = QuantileEstimationMethod.R8, + ) shouldBe null + + // All NaN values with skipNaN = true - p = 0.75 (third quartile) + sequenceOf(Double.NaN, Double.NaN).quantileOrNull( + p = 0.75, + type = typeOf(), + skipNaN = true, + method = QuantileEstimationMethod.R8, + ) shouldBe null + + // All NaN values with skipNaN = true - p = 0.9 (90th percentile) + sequenceOf(Double.NaN, Double.NaN).quantileOrNull( + p = 0.9, + type = typeOf(), + skipNaN = true, + method = QuantileEstimationMethod.R8, + ) shouldBe null + + // All NaN values with skipNaN = false - p = 0.1 (10th percentile) + val result01 = sequenceOf(Double.NaN, Double.NaN).quantileOrNull( + p = 0.1, + type = typeOf(), + skipNaN = false, + method = QuantileEstimationMethod.R8, + ) + (result01 as Double).isNaN() shouldBe true + + // All NaN values with skipNaN = false - p = 0.5 (median) + val result = sequenceOf(Double.NaN, Double.NaN).quantileOrNull( + p = 0.5, + type = typeOf(), + skipNaN = false, + method = QuantileEstimationMethod.R8, + ) + (result as Double).isNaN() shouldBe true + + // All NaN values with skipNaN = false - p = 0.25 (first quartile) + val result25 = sequenceOf(Double.NaN, Double.NaN).quantileOrNull( + p = 0.25, + type = typeOf(), + skipNaN = false, + method = QuantileEstimationMethod.R8, + ) + (result25 as Double).isNaN() shouldBe true + + // All NaN values with skipNaN = false - p = 0.75 (third quartile) + val result75 = sequenceOf(Double.NaN, Double.NaN).quantileOrNull( + p = 0.75, + type = typeOf(), + skipNaN = false, + method = QuantileEstimationMethod.R8, + ) + (result75 as Double).isNaN() shouldBe true + + // All NaN values with skipNaN = false - p = 0.9 (90th percentile) + val result09 = sequenceOf(Double.NaN, Double.NaN).quantileOrNull( + p = 0.9, + type = typeOf(), + skipNaN = false, + method = QuantileEstimationMethod.R8, + ) + (result09 as Double).isNaN() shouldBe true + } +} diff --git a/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/statistics/percentile.kt b/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/statistics/percentile.kt index 34d4f6c6fe..ceee06e325 100644 --- a/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/statistics/percentile.kt +++ b/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/statistics/percentile.kt @@ -1,25 +1,64 @@ +@file:OptIn(ExperimentalTypeInference::class) + package org.jetbrains.kotlinx.dataframe.statistics +import io.kotest.matchers.doubles.shouldBeNaN +import io.kotest.matchers.nulls.shouldBeNull import io.kotest.matchers.shouldBe +import org.jetbrains.kotlinx.dataframe.DataColumn import org.jetbrains.kotlinx.dataframe.api.Infer import org.jetbrains.kotlinx.dataframe.api.columnOf import org.jetbrains.kotlinx.dataframe.api.dataFrameOf +import org.jetbrains.kotlinx.dataframe.api.groupBy import org.jetbrains.kotlinx.dataframe.api.mapToColumn import org.jetbrains.kotlinx.dataframe.api.percentile -import org.jetbrains.kotlinx.dataframe.api.rowPercentile +import org.jetbrains.kotlinx.dataframe.api.percentileBy +import org.jetbrains.kotlinx.dataframe.api.percentileByOrNull +import org.jetbrains.kotlinx.dataframe.api.percentileFor +import org.jetbrains.kotlinx.dataframe.api.percentileOf +import org.jetbrains.kotlinx.dataframe.api.percentileOrNull +import org.jetbrains.kotlinx.dataframe.api.rowPercentileOf +import org.jetbrains.kotlinx.dataframe.impl.nothingType import org.junit.Test +import kotlin.experimental.ExperimentalTypeInference +import kotlin.reflect.typeOf @Suppress("ktlint:standard:argument-list-wrapping") class PercentileTests { + val personsDf = dataFrameOf("name", "age", "city", "weight", "height", "yearsToRetirement")( + "Alice", 15, "London", 99.5, "1.85", 50, + "Bob", 20, "Paris", 140.0, "1.35", 45, + "Charlie", 100, "Dubai", 75.0, "1.95", 0, + "Rose", 1, "Moscow", 45.33, "0.79", 64, + "Dylan", 35, "London", 23.4, "1.83", 30, + "Eve", 40, "Paris", 56.72, "1.85", 25, + "Frank", 55, "Dubai", 78.9, "1.35", 10, + "Grace", 29, "Moscow", 67.8, "1.65", 36, + "Hank", 60, "Paris", 80.22, "1.75", 5, + "Isla", 22, "London", 75.1, "1.85", 43, + ) + + @Test + fun `percentileOf test`() { + val d = personsDf.groupBy("city").percentileOf(75.0, "newAge") { "age"() * 10 } + d["newAge"].type() shouldBe typeOf() + } + @Test fun `percentile of two columns`() { - val df = dataFrameOf("a", "b")( - 1, 4, - 2, 6, - 7, 7, + val df = dataFrameOf("a", "b", "c")( + 1, 4, "a", + 2, 6, "b", + 7, 7, "c", ) - df.percentile(60.0, "a", "b") shouldBe 6 + df.percentile(60.0, "a", "b") shouldBe 6.133333333333333 + df.percentile(60.0) { "a"() and "b"() } shouldBe 6.133333333333333 + df.percentileOrNull(60.0) { "a"() and "b"() } shouldBe 6.133333333333333 + df.percentile(50.0, "c") shouldBe "b" + + df.percentile<_, String>(50.0) { "c"() } shouldBe "b" + df.percentileOrNull<_, String>(50.0) { "c"() } shouldBe "b" } @Test @@ -29,6 +68,236 @@ class PercentileTests { 2, 4, 10, 7, 7, 1, ) - df.mapToColumn("", Infer.Type) { it.rowPercentile(25.0) } shouldBe columnOf(1, 2, 1) + df.mapToColumn("", Infer.Type) { it.rowPercentileOf(25.0) } shouldBe columnOf(1, 2, 2) + df.mapToColumn("", Infer.Type) { it.rowPercentileOf(50.0) } shouldBe columnOf(3, 4, 7) + df.mapToColumn("", Infer.Type) { it.rowPercentileOf(75.0) } shouldBe columnOf(3, 9, 7) + } + + @Test + fun `percentile with regular values`() { + val col = columnOf(5, 2, 8, 1, 9) + col.percentile(25.0) shouldBe 1.6666666666666665 + col.percentile(50.0) shouldBe 5 + col.percentile(75.0) shouldBe 8.333333333333332 + col.percentile(90.0) shouldBe 9 + } + + @Test + fun `percentile with null`() { + val colWithNull = columnOf(5, 2, null, 1, 9) + colWithNull.percentile(25.0) shouldBe 1.4166666666666665 + colWithNull.percentile(50.0) shouldBe 3.5 + colWithNull.percentile(75.0) shouldBe 7.333333333333334 + } + + @Test + fun `percentile with different numeric types`() { + // Integer types + columnOf(5, 2, 8, 1, 9).percentile(50.0) shouldBe 5.0 + columnOf(5L, 2L, 8L, 1L, 9L).percentile(50.0) shouldBe 5.0 + + // Floating point types + columnOf(5.0, 2.0, 8.0, 1.0, 9.0).percentile(50.0) shouldBe 5.0 + columnOf(5.0f, 2.0f, 8.0f, 1.0f, 9.0f).percentile(50.0) shouldBe 5.0 + + // Test with different percentile values + columnOf(5, 2, 8, 1, 9).percentile(25.0) shouldBe 1.6666666666666665 + columnOf(5, 2, 8, 1, 9).percentile(75.0) shouldBe 8.333333333333332 + } + + @Test + fun `percentile with empty column`() { + DataColumn.createValueColumn("", emptyList(), nothingType(false)).percentileOrNull(50.0).shouldBeNull() + DataColumn.createValueColumn("", emptyList(), nothingType(false)).percentileOrNull(25.0).shouldBeNull() + DataColumn.createValueColumn("", emptyList(), nothingType(false)).percentileOrNull(75.0).shouldBeNull() + } + + @Test + fun `percentile with just nulls`() { + val column = DataColumn.createValueColumn("", listOf(null, null), nothingType(true)) + column.percentileOrNull(50.0).shouldBeNull() + column.percentileOrNull(25.0).shouldBeNull() + column.percentileOrNull(75.0).shouldBeNull() + } + + @Test + fun `percentile with just NaNs`() { + columnOf(Double.NaN, Double.NaN).percentile(50.0).shouldBeNaN() + columnOf(Double.NaN, Double.NaN).percentileOrNull(50.0)!!.shouldBeNaN() + + // With skipNaN=true and only NaN values, result should be null + columnOf(Double.NaN, Double.NaN).percentileOrNull(50.0, skipNaN = true).shouldBeNull() + } + + @Test + fun `percentile with nans and nulls`() { + // Percentile functions should return NaN if any value is NaN + columnOf(5.0, 2.0, Double.NaN, 1.0, null).percentile(50.0).shouldBeNaN() + + // With skipNaN=true, NaN values should be ignored + columnOf(5.0, 2.0, Double.NaN, 1.0, null).percentile(50.0, skipNaN = true) shouldBe 2.0 + columnOf(5.0, 2.0, Double.NaN, 1.0, null).percentile(25.0, skipNaN = true) shouldBe 1.1666666666666667 + columnOf(5.0, 2.0, Double.NaN, 1.0, null).percentile(75.0, skipNaN = true) shouldBe 4.5 + } + + @Test + fun `percentileBy with selector function`() { + // Test with a data class + data class Person(val name: String, val age: Int) + + val people = columnOf( + Person("Charlie", 35), + Person("Bob", 25), + Person("Alice", 30), + ) + + // Find person with percentile age + people.percentileBy(0.0) { it.age } shouldBe Person("Bob", 25) + people.percentileBy(25.0) { it.age } shouldBe Person("Bob", 25) + people.percentileBy(50.0) { it.age } shouldBe Person("Alice", 30) + people.percentileBy(75.0) { it.age } shouldBe Person("Alice", 30) + people.percentileBy(100.0) { it.age } shouldBe Person("Charlie", 35) + + // With null values + val peopleWithNull = columnOf( + Person("Alice", 30), + Person("Bob", 25), + null, + Person("Charlie", 35), + ) + + peopleWithNull.percentileBy(50.0) { it?.age ?: Int.MAX_VALUE } shouldBe Person("Alice", 30) + peopleWithNull.percentileByOrNull(50.0) { it?.age ?: Int.MAX_VALUE } shouldBe Person("Alice", 30) + } + + @Test + fun `percentileOf with transformer function`() { + // Test with strings that can be converted to numbers + val strings = columnOf("5", "2", "8", "1", "9") + strings.percentileOf(50.0) { it.toInt() } shouldBe 5 + strings.percentileOf(25.0) { it.toInt() } shouldBe 1.6666666666666665 + strings.percentileOf(75.0) { it.toInt() } shouldBe 8.333333333333332 + } + + @Test + fun `percentileOf with transformer function with nulls`() { + val stringsWithNull = columnOf("5", "2", null, "1", "9") + stringsWithNull.percentileOf(50.0) { it?.toInt() } shouldBe 3.5 + stringsWithNull.percentileOf(25.0) { it?.toInt() } shouldBe 1.4166666666666665 + stringsWithNull.percentileOf(75.0) { it?.toInt() } shouldBe 7.333333333333334 + } + + @Test + fun `percentileOf with transformer function with NaNs`() { + // Percentile functions should return NaN if any value is NaN + val mixedValues = columnOf("5.0", "2.0", "NaN", "1.0", "9.0") + mixedValues.percentileOf(50.0) { + val num = it.toDoubleOrNull() + if (num == null || num.isNaN()) Double.NaN else num + }.shouldBeNaN() + + // With skipNaN=true, NaN values should be ignored + mixedValues.percentileOf(50.0, skipNaN = true) { + val num = it.toDoubleOrNull() + if (num == null || num.isNaN()) Double.NaN else num + } shouldBe 3.5 + } + + @[Test Suppress("ktlint:standard:argument-list-wrapping")] + fun `rowPercentileOf with dataframe`() { + val df = dataFrameOf( + "a", "b", "c", + )( + 1f, 2, 3, + 4f, 5, 6, + 7f, 8, 9, + ) + + // Find percentile value in each row + df[0].rowPercentileOf(25.0) shouldBe 2.0 + df[0].rowPercentileOf(50.0) shouldBe 2.5 + df[0].rowPercentileOf(75.0) shouldBe 3.0 + + df[1].rowPercentileOf(50.0) shouldBe 4.0 + df[2].rowPercentileOf(50.0) shouldBe 8.5 + } + + @[Test Suppress("ktlint:standard:argument-list-wrapping")] + fun `dataframe percentile`() { + val df = dataFrameOf( + "a", "b", "c", + )( + 1, 2f, 3.0, + 4, 5f, 6.0, + 7, 8f, 9.0, + ) + + // Get row with percentile values for each column + val percentiles50 = df.percentile(50.0) + percentiles50["a"] shouldBe 4 + percentiles50["b"] shouldBe 5f + percentiles50["c"] shouldBe 6.0 + + val percentiles25 = df.percentile(25.0) + percentiles25["a"] shouldBe 1.5000000000000002 + percentiles25["b"] shouldBe 2.5f + percentiles25["c"] shouldBe 3.5 + + val percentiles75 = df.percentile(75.0) + percentiles75["a"] shouldBe 6.5 + percentiles75["b"] shouldBe 7.5f + percentiles75["c"] shouldBe 8.5 + + // Test percentile for specific columns + val percentileFor50 = df.percentileFor(50.0, "a", "c") + percentileFor50["a"] shouldBe 4 + percentileFor50["c"] shouldBe 6.0 + } + + @[Test Suppress("ktlint:standard:argument-list-wrapping")] + fun `dataframe percentileBy and percentileOf`() { + val df = dataFrameOf( + "a", "b", "c", + )( + 1, 2, 3, + 4, 5, 6, + 7, 8, 9, + ) + + // Find row with percentile value of column "a" + val percentileByA50 = df.percentileBy(50.0, "a") + percentileByA50["a"] shouldBe 4 + percentileByA50["b"] shouldBe 5 + percentileByA50["c"] shouldBe 6 + + val percentileByA25 = df.percentileBy(25.0, "a") + percentileByA25["a"] shouldBe 1 + percentileByA25["b"] shouldBe 2 + percentileByA25["c"] shouldBe 3 + + val percentileByA75 = df.percentileBy(75.0, "a") + percentileByA75["a"] shouldBe 4 + percentileByA75["b"] shouldBe 5 + percentileByA75["c"] shouldBe 6 + + // Find percentile value of a + c for each row, [1+3, 4+6, 7+9] => [4, 10, 16] + df.percentileOf(50.0) { "a"() + "c"() } shouldBe 10.0 + df.percentileOf(25.0) { "a"() + "c"() } shouldBe 5.0 + df.percentileOf(75.0) { "a"() + "c"() } shouldBe 15.0 + } + + @[Test Suppress("ktlint:standard:argument-list-wrapping")] + fun `percentile with NaN values for floating point numbers`() { + // Test with Float.NaN values + val floatWithNaN = columnOf(5.0f, 2.0f, Float.NaN, 1.0f, 9.0f) + floatWithNaN.percentile(50.0).shouldBeNaN() // Percentile functions should return NaN if any value is NaN + floatWithNaN.percentile(50.0, skipNaN = true) shouldBe 3.5 // With skipNaN=true, NaN values should be ignored + floatWithNaN.percentile(25.0, skipNaN = true) shouldBe 1.4166666666666665 + floatWithNaN.percentile(75.0, skipNaN = true) shouldBe 7.333333333333334 + + // Test with Double.NaN values + val doubleWithNaN = columnOf(5.0, 2.0, Double.NaN, 1.0, 9.0) + doubleWithNaN.percentile(50.0).shouldBeNaN() // Percentile functions should return NaN if any value is NaN + doubleWithNaN.percentile(50.0, skipNaN = true) shouldBe 3.5 // With skipNaN=true, NaN values should be ignored } } diff --git a/gradle/libs.versions.toml b/gradle/libs.versions.toml index 7b439ff968..2918c18b55 100644 --- a/gradle/libs.versions.toml +++ b/gradle/libs.versions.toml @@ -23,6 +23,7 @@ fastDoubleParser = "2.0.1" commonsCsv = "1.12.0" commonsCompress = "1.27.1" commonsIo = "2.18.0" +commonsStatistics = "1.1" serialization = "1.7.1" poi = "5.3.0" mariadb = "3.5.1" @@ -74,6 +75,8 @@ fastDoubleParser = { group = "ch.randelshofer", name = "fastdoubleparser", versi commonsCsv = { group = "org.apache.commons", name = "commons-csv", version.ref = "commonsCsv" } commonsCompress = { group = "org.apache.commons", name = "commons-compress", version.ref = "commonsCompress" } commonsIo = { group = "commons-io", name = "commons-io", version.ref = "commonsIo" } +commonsStatisticsDescriptive = { group = "org.apache.commons", name = "commons-statistics-descriptive", version.ref = "commonsStatistics" } + # Serialization serialization-core = { group = "org.jetbrains.kotlinx", name = "kotlinx-serialization-core", version.ref = "serialization" } serialization-json = { group = "org.jetbrains.kotlinx", name = "kotlinx-serialization-json", version.ref = "serialization" }