Skip to content

Commit dd095d4

Browse files
committed
separated parts of min/max tests
1 parent 60319f4 commit dd095d4

File tree

3 files changed

+166
-48
lines changed

3 files changed

+166
-48
lines changed

core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/aggregation/aggregators/multipleColumnsHandlers/TwoStepMultipleColumnsHandler.kt

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,10 +22,14 @@ import kotlin.reflect.KType
2222
* the handlers of this [aggregator] will be cast and reused.
2323
* In all cases [NoMultipleColumnsHandler] will be used as [AggregatorMultipleColumnsHandler].
2424
*
25+
* This is useful for aggregators that do not depend on the distribution of values across multiple columns.
26+
* It may be more memory efficient than [FlatteningMultipleColumnsHandler] and could be parallelized in the future.
27+
*
2528
* @param stepTwoAggregationHandler The [aggregation handler][AggregatorAggregationHandler] for the second step.
2629
* If not supplied, the handler of the first step is reused.
2730
* @param stepTwoInputHandler The [input handler][AggregatorInputHandler] for the second step.
2831
* If not supplied, the handler of the first step is reused.
32+
* @see [FlatteningMultipleColumnsHandler]
2933
*/
3034
internal class TwoStepMultipleColumnsHandler<in Value : Any, out Return : Any?>(
3135
stepTwoAggregationHandler: AggregatorAggregationHandler<Return & Any, Return>? = null,

core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/statistics/max.kt

Lines changed: 82 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ import org.jetbrains.kotlinx.dataframe.api.maxOfOrNull
1818
import org.jetbrains.kotlinx.dataframe.api.maxOrNull
1919
import org.jetbrains.kotlinx.dataframe.api.rowMaxOf
2020
import org.jetbrains.kotlinx.dataframe.impl.nothingType
21+
import org.junit.Ignore
2122
import org.junit.Test
2223

2324
class MaxTests {
@@ -26,8 +27,11 @@ class MaxTests {
2627
fun `max with regular values`() {
2728
val col = columnOf(5, 2, 8, 1, 9)
2829
col.max() shouldBe 9
30+
}
2931

30-
val colWithNull = columnOf<Int?>(5, 2, null, 1, 9)
32+
@Test
33+
fun `max with null`() {
34+
val colWithNull = columnOf(5, 2, null, 1, 9)
3135
colWithNull.max() shouldBe 9
3236
}
3337

@@ -42,29 +46,43 @@ class MaxTests {
4246
// Floating point types
4347
columnOf(5.0, 2.0, 8.0, 1.0, 9.0).max() shouldBe 9.0
4448
columnOf(5.0f, 2.0f, 8.0f, 1.0f, 9.0f).max() shouldBe 9.0f
49+
}
4550

46-
// Mixed number types todo
47-
// columnOf<Number>(5, 2L, 8.0f, 1.0, 9.toShort()).max() shouldBe 9.0
51+
@Ignore
52+
@Test
53+
fun `max with mixed numeric type`() {
54+
// Mixed number types todo https://github.com/Kotlin/dataframe/issues/1113
55+
// columnOf<Number>(5, 2L, 8.0f, 1.0, 9.toShort()).max() shouldBe 9.0
4856
}
4957

5058
@Test
51-
fun `max with nans and nulls`() {
52-
// Max functions should return NaN if any value is NaN
53-
columnOf(5.0, 2.0, Double.NaN, 1.0, null).max().shouldBeNaN()
59+
fun `max with empty column`() {
60+
DataColumn.createValueColumn("", emptyList<Nothing>(), nothingType(false)).maxOrNull().shouldBeNull()
61+
}
5462

55-
// With skipNaN=true, NaN values should be ignored
56-
columnOf(5.0, 2.0, Double.NaN, 1.0, null).max(skipNaN = true) shouldBe 5.0
63+
@Test
64+
fun `max with just nulls`() {
65+
DataColumn.createValueColumn("", listOf(null, null), nothingType(true)).maxOrNull().shouldBeNull()
66+
}
5767

58-
// Empty columns or columns with only nulls/NaNs
59-
DataColumn.createValueColumn("", emptyList<Nothing>(), nothingType(false)).maxOrNull().shouldBeNull()
60-
DataColumn.createValueColumn("", listOf(null), nothingType(true)).maxOrNull().shouldBeNull()
68+
@Test
69+
fun `max with just NaNs`() {
6170
columnOf(Double.NaN, Double.NaN).max().shouldBeNaN()
6271
columnOf(Double.NaN, Double.NaN).maxOrNull()!!.shouldBeNaN()
6372

6473
// With skipNaN=true and only NaN values, result should be null
6574
columnOf(Double.NaN, Double.NaN).maxOrNull(skipNaN = true).shouldBeNull()
6675
}
6776

77+
@Test
78+
fun `max with nans and nulls`() {
79+
// Max functions should return NaN if any value is NaN
80+
columnOf(5.0, 2.0, Double.NaN, 1.0, null).max().shouldBeNaN()
81+
82+
// With skipNaN=true, NaN values should be ignored
83+
columnOf(5.0, 2.0, Double.NaN, 1.0, null).max(skipNaN = true) shouldBe 5.0
84+
}
85+
6886
@Test
6987
fun `maxBy with selector function`() {
7088
// Test with a data class
@@ -87,8 +105,10 @@ class MaxTests {
87105
Person("Charlie", 35),
88106
)
89107

108+
peopleWithNull.maxBy { it?.age ?: Int.MIN_VALUE } shouldBe Person("Charlie", 35)
90109
peopleWithNull.maxByOrNull { it?.age ?: Int.MIN_VALUE } shouldBe Person("Charlie", 35)
91110
// can sort by null, as it will be filtered out
111+
peopleWithNull.maxBy { it?.age } shouldBe Person("Charlie", 35)
92112
peopleWithNull.maxByOrNull { it?.age } shouldBe Person("Charlie", 35)
93113
}
94114

@@ -97,11 +117,18 @@ class MaxTests {
97117
// Test with strings that can be converted to numbers
98118
val strings = columnOf("5", "2", "8", "1", "9")
99119
strings.maxOf { it.toInt() } shouldBe 9
120+
strings.maxOfOrNull { it.toInt() } shouldBe 9
121+
}
100122

101-
// With null values
123+
@Test
124+
fun `maxOf with transformer function with nulls`() {
102125
val stringsWithNull = columnOf("5", "2", null, "1", "9")
126+
stringsWithNull.maxOf { it?.toInt() } shouldBe 9
103127
stringsWithNull.maxOfOrNull { it?.toInt() } shouldBe 9
128+
}
104129

130+
@Test
131+
fun `maxOf with transformer function with NaNs`() {
105132
// Max functions should return NaN if any value is NaN
106133
val mixedValues = columnOf("5.0", "2.0", "NaN", "1.0", "9.0")
107134
mixedValues.maxOf {
@@ -116,8 +143,7 @@ class MaxTests {
116143
} shouldBe 9.0
117144
}
118145

119-
@Suppress("ktlint:standard:argument-list-wrapping")
120-
@Test
146+
@[Test Suppress("ktlint:standard:argument-list-wrapping")]
121147
fun `rowMaxOf with dataframe`() {
122148
val df = dataFrameOf(
123149
"a", "b", "c",
@@ -131,7 +157,32 @@ class MaxTests {
131157
df[0].rowMaxOf<Int>() shouldBe 3
132158
df[1].rowMaxOf<Float>() shouldBe 4f
133159
df[2].rowMaxOf<Int>() shouldBe 9
160+
}
134161

162+
@[Test Suppress("ktlint:standard:argument-list-wrapping")]
163+
fun `rowMaxOf with dataframe and nulls`() {
164+
val df = dataFrameOf(
165+
"a", "b", "c",
166+
)(
167+
1f, 2, 3,
168+
4f, null, 6,
169+
7f, 8, 9,
170+
)
171+
172+
// Find maximum value in each row
173+
df[0].rowMaxOf<Int>() shouldBe 3
174+
df[0].rowMaxOf<Int?>() shouldBe 3 // TODO?
175+
176+
df[1].rowMaxOf<Float>() shouldBe 4f
177+
df[1].rowMaxOf<Int>() shouldBe 6
178+
df[1].rowMaxOf<Int?>() shouldBe 6
179+
180+
df[2].rowMaxOf<Int>() shouldBe 9
181+
df[2].rowMaxOf<Int?>() shouldBe 9 // TODO?
182+
}
183+
184+
@[Test Suppress("ktlint:standard:argument-list-wrapping")]
185+
fun `rowMaxOf with dataframe and NaNs`() {
135186
// Max functions should return NaN if any value is NaN
136187
val dfWithNaN = dataFrameOf(
137188
"a", "b", "c",
@@ -151,8 +202,7 @@ class MaxTests {
151202
dfWithNaN[2].rowMaxOf<Double>(skipNaN = true) shouldBe 8.0
152203
}
153204

154-
@Suppress("ktlint:standard:argument-list-wrapping")
155-
@Test
205+
@[Test Suppress("ktlint:standard:argument-list-wrapping")]
156206
fun `dataframe max`() {
157207
val df = dataFrameOf(
158208
"a", "b", "c",
@@ -172,14 +222,25 @@ class MaxTests {
172222
val maxFor = df.maxFor("a", "c")
173223
maxFor["a"] shouldBe 7
174224
maxFor["c"] shouldBe 9.0
225+
}
226+
227+
@Ignore
228+
@[Test Suppress("ktlint:standard:argument-list-wrapping")]
229+
fun `dataframe max mixed number types`() {
230+
val df = dataFrameOf(
231+
"a", "b", "c",
232+
)(
233+
1, 2f, 3.0,
234+
4, 5f, 6.0,
235+
7, 8f, 9.0,
236+
)
175237

176238
// Test max of all columns as a single value
177239
// TODO https://github.com/Kotlin/dataframe/issues/1113
178-
// df.max("a", "b", "c") shouldBe 1
240+
df.max("a", "b", "c") shouldBe 9
179241
}
180242

181-
@Suppress("ktlint:standard:argument-list-wrapping")
182-
@Test
243+
@[Test Suppress("ktlint:standard:argument-list-wrapping")]
183244
fun `dataframe maxBy and maxOf`() {
184245
val df = dataFrameOf(
185246
"a", "b", "c",
@@ -196,11 +257,10 @@ class MaxTests {
196257
maxByA["c"] shouldBe 9
197258

198259
// Find maximum value of a + c for each row
199-
df.maxOf { "a"<Int>() + "c"<Int>() } shouldBe 16 // 7 + 9 = 18
260+
df.maxOf { "a"<Int>() + "c"<Int>() } shouldBe 16 // 7 + 9 = 16
200261
}
201262

202-
@Suppress("ktlint:standard:argument-list-wrapping")
203-
@Test
263+
@[Test Suppress("ktlint:standard:argument-list-wrapping")]
204264
fun `max with NaN values for floating point numbers`() {
205265
// Test with Float.NaN values
206266
val floatWithNaN = columnOf(5.0f, 2.0f, Float.NaN, 1.0f, 9.0f)

0 commit comments

Comments
 (0)