Skip to content

Commit 751321e

Browse files
committed
added skipNaN option for mean, sum, and min
1 parent 6c76219 commit 751321e

File tree

19 files changed

+618
-367
lines changed

19 files changed

+618
-367
lines changed
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
package org.jetbrains.kotlinx.dataframe.api
22

33
@PublishedApi
4-
internal val skipNA_default: Boolean = false
4+
internal val skipNaN_default: Boolean = false
55

66
@PublishedApi
77
internal val ddof_default: Int = 1

core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/Nulls.kt

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,8 @@ import org.jetbrains.kotlinx.dataframe.documentation.NaN
2222
import org.jetbrains.kotlinx.dataframe.documentation.SelectingColumns
2323
import org.jetbrains.kotlinx.dataframe.get
2424
import org.jetbrains.kotlinx.dataframe.typeClass
25+
import kotlin.contracts.ExperimentalContracts
26+
import kotlin.contracts.contract
2527
import kotlin.reflect.KProperty
2628

2729
// region fillNulls
@@ -100,6 +102,14 @@ public fun <T, C> DataFrame<T>.fillNulls(vararg columns: ColumnReference<C>): Up
100102

101103
internal inline val Any?.isNaN: Boolean get() = (this is Double && isNaN()) || (this is Float && isNaN())
102104

105+
@JvmName("isNaWithContract")
106+
@Suppress("NOTHING_TO_INLINE")
107+
@OptIn(ExperimentalContracts::class)
108+
internal inline fun <T : Any?> T.isNA(): Boolean {
109+
contract { returns(false) implies (this@isNA != null) }
110+
return isNA
111+
}
112+
103113
internal inline val Any?.isNA: Boolean
104114
get() = when (this) {
105115
null -> true

core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/mean.kt

Lines changed: 84 additions & 82 deletions
Original file line numberDiff line numberDiff line change
@@ -34,243 +34,245 @@ import kotlin.reflect.typeOf
3434

3535
// region DataColumn
3636

37-
public fun DataColumn<Number?>.mean(skipNA: Boolean = skipNA_default): Double =
38-
Aggregators.mean(skipNA).aggregateSingleColumn(this)
37+
public fun DataColumn<Number?>.mean(skipNaN: Boolean = skipNaN_default): Double =
38+
Aggregators.mean(skipNaN).aggregateSingleColumn(this)
3939

4040
public inline fun <T, reified R : Number> DataColumn<T>.meanOf(
41-
skipNA: Boolean = skipNA_default,
41+
skipNaN: Boolean = skipNaN_default,
4242
crossinline expression: (T) -> R?,
43-
): Double = Aggregators.mean(skipNA).aggregateOf(this, expression)
43+
): Double = Aggregators.mean(skipNaN).aggregateOf(this, expression)
4444

4545
// endregion
4646

4747
// region DataRow
4848

49-
public fun AnyRow.rowMean(skipNA: Boolean = skipNA_default): Double =
50-
Aggregators.mean(skipNA).aggregateOfRow(this, primitiveOrMixedNumberColumns())
49+
public fun AnyRow.rowMean(skipNaN: Boolean = skipNaN_default): Double =
50+
Aggregators.mean(skipNaN).aggregateOfRow(this, primitiveOrMixedNumberColumns())
5151

52-
public inline fun <reified T : Number?> AnyRow.rowMeanOf(skipNA: Boolean = skipNA_default): Double {
52+
public inline fun <reified T : Number?> AnyRow.rowMeanOf(skipNaN: Boolean = skipNaN_default): Double {
5353
require(typeOf<T>().isPrimitiveOrMixedNumber()) {
5454
"Type ${T::class.simpleName} is not a primitive number type. Mean only supports primitive number types."
5555
}
56-
return Aggregators.mean(skipNA).aggregateOfRow(this) { colsOf<T>() }
56+
return Aggregators.mean(skipNaN).aggregateOfRow(this) { colsOf<T>() }
5757
}
5858

5959
// endregion
6060

6161
// region DataFrame
6262

63-
public fun <T> DataFrame<T>.mean(skipNA: Boolean = skipNA_default): DataRow<T> =
64-
meanFor(skipNA, primitiveOrMixedNumberColumns())
63+
public fun <T> DataFrame<T>.mean(skipNaN: Boolean = skipNaN_default): DataRow<T> =
64+
meanFor(skipNaN, primitiveOrMixedNumberColumns())
6565

6666
public fun <T, C : Number> DataFrame<T>.meanFor(
67-
skipNA: Boolean = skipNA_default,
67+
skipNaN: Boolean = skipNaN_default,
6868
columns: ColumnsForAggregateSelector<T, C?>,
69-
): DataRow<T> = Aggregators.mean(skipNA).aggregateFor(this, columns)
69+
): DataRow<T> = Aggregators.mean(skipNaN).aggregateFor(this, columns)
7070

71-
public fun <T> DataFrame<T>.meanFor(vararg columns: String, skipNA: Boolean = skipNA_default): DataRow<T> =
72-
meanFor(skipNA) { columns.toNumberColumns() }
71+
public fun <T> DataFrame<T>.meanFor(vararg columns: String, skipNaN: Boolean = skipNaN_default): DataRow<T> =
72+
meanFor(skipNaN) { columns.toNumberColumns() }
7373

7474
@AccessApiOverload
7575
public fun <T, C : Number> DataFrame<T>.meanFor(
7676
vararg columns: ColumnReference<C?>,
77-
skipNA: Boolean = skipNA_default,
78-
): DataRow<T> = meanFor(skipNA) { columns.toColumnSet() }
77+
skipNaN: Boolean = skipNaN_default,
78+
): DataRow<T> = meanFor(skipNaN) { columns.toColumnSet() }
7979

8080
@AccessApiOverload
8181
public fun <T, C : Number> DataFrame<T>.meanFor(
8282
vararg columns: KProperty<C?>,
83-
skipNA: Boolean = skipNA_default,
84-
): DataRow<T> = meanFor(skipNA) { columns.toColumnSet() }
83+
skipNaN: Boolean = skipNaN_default,
84+
): DataRow<T> = meanFor(skipNaN) { columns.toColumnSet() }
8585

8686
public fun <T, C : Number> DataFrame<T>.mean(
87-
skipNA: Boolean = skipNA_default,
87+
skipNaN: Boolean = skipNaN_default,
8888
columns: ColumnsSelector<T, C?>,
89-
): Double = Aggregators.mean(skipNA).aggregateAll(this, columns)
89+
): Double = Aggregators.mean(skipNaN).aggregateAll(this, columns)
9090

91-
public fun <T> DataFrame<T>.mean(vararg columns: String, skipNA: Boolean = skipNA_default): Double =
92-
mean(skipNA) { columns.toNumberColumns() }
91+
public fun <T> DataFrame<T>.mean(vararg columns: String, skipNaN: Boolean = skipNaN_default): Double =
92+
mean(skipNaN) { columns.toNumberColumns() }
9393

9494
@AccessApiOverload
9595
public fun <T, C : Number> DataFrame<T>.mean(
9696
vararg columns: ColumnReference<C?>,
97-
skipNA: Boolean = skipNA_default,
98-
): Double = mean(skipNA) { columns.toColumnSet() }
97+
skipNaN: Boolean = skipNaN_default,
98+
): Double = mean(skipNaN) { columns.toColumnSet() }
9999

100100
@AccessApiOverload
101-
public fun <T, C : Number> DataFrame<T>.mean(vararg columns: KProperty<C?>, skipNA: Boolean = skipNA_default): Double =
102-
mean(skipNA) { columns.toColumnSet() }
101+
public fun <T, C : Number> DataFrame<T>.mean(
102+
vararg columns: KProperty<C?>,
103+
skipNaN: Boolean = skipNaN_default,
104+
): Double = mean(skipNaN) { columns.toColumnSet() }
103105

104106
public inline fun <T, reified D : Number> DataFrame<T>.meanOf(
105-
skipNA: Boolean = skipNA_default,
107+
skipNaN: Boolean = skipNaN_default,
106108
crossinline expression: RowExpression<T, D?>,
107-
): Double = Aggregators.mean(skipNA).aggregateOf(this, expression)
109+
): Double = Aggregators.mean(skipNaN).aggregateOf(this, expression)
108110

109111
// endregion
110112

111113
// region GroupBy
112114
@Refine
113115
@Interpretable("GroupByMean1")
114-
public fun <T> Grouped<T>.mean(skipNA: Boolean = skipNA_default): DataFrame<T> =
115-
meanFor(skipNA, primitiveOrMixedNumberColumns())
116+
public fun <T> Grouped<T>.mean(skipNaN: Boolean = skipNaN_default): DataFrame<T> =
117+
meanFor(skipNaN, primitiveOrMixedNumberColumns())
116118

117119
@Refine
118120
@Interpretable("GroupByMean0")
119121
public fun <T, C : Number> Grouped<T>.meanFor(
120-
skipNA: Boolean = skipNA_default,
122+
skipNaN: Boolean = skipNaN_default,
121123
columns: ColumnsForAggregateSelector<T, C?>,
122-
): DataFrame<T> = Aggregators.mean(skipNA).aggregateFor(this, columns)
124+
): DataFrame<T> = Aggregators.mean(skipNaN).aggregateFor(this, columns)
123125

124-
public fun <T> Grouped<T>.meanFor(vararg columns: String, skipNA: Boolean = skipNA_default): DataFrame<T> =
125-
meanFor(skipNA) { columns.toNumberColumns() }
126+
public fun <T> Grouped<T>.meanFor(vararg columns: String, skipNaN: Boolean = skipNaN_default): DataFrame<T> =
127+
meanFor(skipNaN) { columns.toNumberColumns() }
126128

127129
@AccessApiOverload
128130
public fun <T, C : Number> Grouped<T>.meanFor(
129131
vararg columns: ColumnReference<C?>,
130-
skipNA: Boolean = skipNA_default,
131-
): DataFrame<T> = meanFor(skipNA) { columns.toColumnSet() }
132+
skipNaN: Boolean = skipNaN_default,
133+
): DataFrame<T> = meanFor(skipNaN) { columns.toColumnSet() }
132134

133135
@AccessApiOverload
134136
public fun <T, C : Number> Grouped<T>.meanFor(
135137
vararg columns: KProperty<C?>,
136-
skipNA: Boolean = skipNA_default,
137-
): DataFrame<T> = meanFor(skipNA) { columns.toColumnSet() }
138+
skipNaN: Boolean = skipNaN_default,
139+
): DataFrame<T> = meanFor(skipNaN) { columns.toColumnSet() }
138140

139141
@Refine
140142
@Interpretable("GroupByMean0")
141143
public fun <T, C : Number> Grouped<T>.mean(
142144
name: String? = null,
143-
skipNA: Boolean = skipNA_default,
145+
skipNaN: Boolean = skipNaN_default,
144146
columns: ColumnsSelector<T, C?>,
145-
): DataFrame<T> = Aggregators.mean(skipNA).aggregateAll(this, name, columns)
147+
): DataFrame<T> = Aggregators.mean(skipNaN).aggregateAll(this, name, columns)
146148

147149
public fun <T> Grouped<T>.mean(
148150
vararg columns: String,
149151
name: String? = null,
150-
skipNA: Boolean = skipNA_default,
151-
): DataFrame<T> = mean(name, skipNA) { columns.toNumberColumns() }
152+
skipNaN: Boolean = skipNaN_default,
153+
): DataFrame<T> = mean(name, skipNaN) { columns.toNumberColumns() }
152154

153155
@AccessApiOverload
154156
public fun <T, C : Number> Grouped<T>.mean(
155157
vararg columns: ColumnReference<C?>,
156158
name: String? = null,
157-
skipNA: Boolean = skipNA_default,
158-
): DataFrame<T> = mean(name, skipNA) { columns.toColumnSet() }
159+
skipNaN: Boolean = skipNaN_default,
160+
): DataFrame<T> = mean(name, skipNaN) { columns.toColumnSet() }
159161

160162
@AccessApiOverload
161163
public fun <T, C : Number> Grouped<T>.mean(
162164
vararg columns: KProperty<C?>,
163165
name: String? = null,
164-
skipNA: Boolean = skipNA_default,
165-
): DataFrame<T> = mean(name, skipNA) { columns.toColumnSet() }
166+
skipNaN: Boolean = skipNaN_default,
167+
): DataFrame<T> = mean(name, skipNaN) { columns.toColumnSet() }
166168

167169
@Refine
168170
@Interpretable("GroupByMeanOf")
169171
public inline fun <T, reified R : Number> Grouped<T>.meanOf(
170172
name: String? = null,
171-
skipNA: Boolean = skipNA_default,
173+
skipNaN: Boolean = skipNaN_default,
172174
crossinline expression: RowExpression<T, R?>,
173-
): DataFrame<T> = Aggregators.mean(skipNA).aggregateOf(this, name, expression)
175+
): DataFrame<T> = Aggregators.mean(skipNaN).aggregateOf(this, name, expression)
174176

175177
// endregion
176178

177179
// region Pivot
178180

179-
public fun <T> Pivot<T>.mean(skipNA: Boolean = skipNA_default, separate: Boolean = false): DataRow<T> =
180-
meanFor(skipNA, separate, primitiveOrMixedNumberColumns())
181+
public fun <T> Pivot<T>.mean(skipNaN: Boolean = skipNaN_default, separate: Boolean = false): DataRow<T> =
182+
meanFor(skipNaN, separate, primitiveOrMixedNumberColumns())
181183

182184
public fun <T, C : Number> Pivot<T>.meanFor(
183-
skipNA: Boolean = skipNA_default,
185+
skipNaN: Boolean = skipNaN_default,
184186
separate: Boolean = false,
185187
columns: ColumnsForAggregateSelector<T, C?>,
186-
): DataRow<T> = delegate { meanFor(skipNA, separate, columns) }
188+
): DataRow<T> = delegate { meanFor(skipNaN, separate, columns) }
187189

188190
public fun <T> Pivot<T>.meanFor(
189191
vararg columns: String,
190-
skipNA: Boolean = skipNA_default,
192+
skipNaN: Boolean = skipNaN_default,
191193
separate: Boolean = false,
192-
): DataRow<T> = meanFor(skipNA, separate) { columns.toNumberColumns() }
194+
): DataRow<T> = meanFor(skipNaN, separate) { columns.toNumberColumns() }
193195

194196
@AccessApiOverload
195197
public fun <T, C : Number> Pivot<T>.meanFor(
196198
vararg columns: ColumnReference<C?>,
197-
skipNA: Boolean = skipNA_default,
199+
skipNaN: Boolean = skipNaN_default,
198200
separate: Boolean = false,
199-
): DataRow<T> = meanFor(skipNA, separate) { columns.toColumnSet() }
201+
): DataRow<T> = meanFor(skipNaN, separate) { columns.toColumnSet() }
200202

201203
@AccessApiOverload
202204
public fun <T, C : Number> Pivot<T>.meanFor(
203205
vararg columns: KProperty<C?>,
204-
skipNA: Boolean = skipNA_default,
206+
skipNaN: Boolean = skipNaN_default,
205207
separate: Boolean = false,
206-
): DataRow<T> = meanFor(skipNA, separate) { columns.toColumnSet() }
208+
): DataRow<T> = meanFor(skipNaN, separate) { columns.toColumnSet() }
207209

208210
public fun <T, R : Number> Pivot<T>.mean(
209-
skipNA: Boolean = skipNA_default,
211+
skipNaN: Boolean = skipNaN_default,
210212
columns: ColumnsSelector<T, R?>,
211-
): DataRow<T> = delegate { mean(skipNA, columns) }
213+
): DataRow<T> = delegate { mean(skipNaN, columns) }
212214

213215
public inline fun <T, reified R : Number> Pivot<T>.meanOf(
214-
skipNA: Boolean = skipNA_default,
216+
skipNaN: Boolean = skipNaN_default,
215217
crossinline expression: RowExpression<T, R?>,
216-
): DataRow<T> = delegate { meanOf(skipNA, expression) }
218+
): DataRow<T> = delegate { meanOf(skipNaN, expression) }
217219

218220
// endregion
219221

220222
// region PivotGroupBy
221223

222-
public fun <T> PivotGroupBy<T>.mean(separate: Boolean = false, skipNA: Boolean = skipNA_default): DataFrame<T> =
223-
meanFor(skipNA, separate, primitiveOrMixedNumberColumns())
224+
public fun <T> PivotGroupBy<T>.mean(separate: Boolean = false, skipNaN: Boolean = skipNaN_default): DataFrame<T> =
225+
meanFor(skipNaN, separate, primitiveOrMixedNumberColumns())
224226

225227
public fun <T, C : Number> PivotGroupBy<T>.meanFor(
226-
skipNA: Boolean = skipNA_default,
228+
skipNaN: Boolean = skipNaN_default,
227229
separate: Boolean = false,
228230
columns: ColumnsForAggregateSelector<T, C?>,
229-
): DataFrame<T> = Aggregators.mean(skipNA).aggregateFor(this, separate, columns)
231+
): DataFrame<T> = Aggregators.mean(skipNaN).aggregateFor(this, separate, columns)
230232

231233
public fun <T> PivotGroupBy<T>.meanFor(
232234
vararg columns: String,
233235
separate: Boolean = false,
234-
skipNA: Boolean = skipNA_default,
235-
): DataFrame<T> = meanFor(skipNA, separate) { columns.toNumberColumns() }
236+
skipNaN: Boolean = skipNaN_default,
237+
): DataFrame<T> = meanFor(skipNaN, separate) { columns.toNumberColumns() }
236238

237239
@AccessApiOverload
238240
public fun <T, C : Number> PivotGroupBy<T>.meanFor(
239241
vararg columns: ColumnReference<C?>,
240242
separate: Boolean = false,
241-
skipNA: Boolean = skipNA_default,
242-
): DataFrame<T> = meanFor(skipNA, separate) { columns.toColumnSet() }
243+
skipNaN: Boolean = skipNaN_default,
244+
): DataFrame<T> = meanFor(skipNaN, separate) { columns.toColumnSet() }
243245

244246
@AccessApiOverload
245247
public fun <T, C : Number> PivotGroupBy<T>.meanFor(
246248
vararg columns: KProperty<C?>,
247249
separate: Boolean = false,
248-
skipNA: Boolean = skipNA_default,
249-
): DataFrame<T> = meanFor(skipNA, separate) { columns.toColumnSet() }
250+
skipNaN: Boolean = skipNaN_default,
251+
): DataFrame<T> = meanFor(skipNaN, separate) { columns.toColumnSet() }
250252

251253
public fun <T, R : Number> PivotGroupBy<T>.mean(
252-
skipNA: Boolean = skipNA_default,
254+
skipNaN: Boolean = skipNaN_default,
253255
columns: ColumnsSelector<T, R?>,
254-
): DataFrame<T> = Aggregators.mean(skipNA).aggregateAll(this, columns)
256+
): DataFrame<T> = Aggregators.mean(skipNaN).aggregateAll(this, columns)
255257

256-
public fun <T> PivotGroupBy<T>.mean(vararg columns: String, skipNA: Boolean = skipNA_default): DataFrame<T> =
257-
mean(skipNA) { columns.toColumnsSetOf() }
258+
public fun <T> PivotGroupBy<T>.mean(vararg columns: String, skipNaN: Boolean = skipNaN_default): DataFrame<T> =
259+
mean(skipNaN) { columns.toColumnsSetOf() }
258260

259261
@AccessApiOverload
260262
public fun <T, R : Number> PivotGroupBy<T>.mean(
261263
vararg columns: ColumnReference<R?>,
262-
skipNA: Boolean = skipNA_default,
263-
): DataFrame<T> = mean(skipNA) { columns.toColumnSet() }
264+
skipNaN: Boolean = skipNaN_default,
265+
): DataFrame<T> = mean(skipNaN) { columns.toColumnSet() }
264266

265267
@AccessApiOverload
266268
public fun <T, R : Number> PivotGroupBy<T>.mean(
267269
vararg columns: KProperty<R?>,
268-
skipNA: Boolean = skipNA_default,
269-
): DataFrame<T> = mean(skipNA) { columns.toColumnSet() }
270+
skipNaN: Boolean = skipNaN_default,
271+
): DataFrame<T> = mean(skipNaN) { columns.toColumnSet() }
270272

271273
public inline fun <T, reified R : Number> PivotGroupBy<T>.meanOf(
272-
skipNA: Boolean = skipNA_default,
274+
skipNaN: Boolean = skipNaN_default,
273275
crossinline expression: RowExpression<T, R?>,
274-
): DataFrame<T> = Aggregators.mean(skipNA).aggregateOf(this, expression)
276+
): DataFrame<T> = Aggregators.mean(skipNaN).aggregateOf(this, expression)
275277

276278
// endregion

0 commit comments

Comments
 (0)