Skip to content

Commit c1ff9cd

Browse files
authored
make sure flatten works corretly on a data frame with zero rows (#3198)
1 parent 8f726a6 commit c1ff9cd

File tree

4 files changed

+22
-3
lines changed

4 files changed

+22
-3
lines changed

NEWS.md

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,10 @@
1+
# DataFrames.jl v1.4.2 Patch Release Notes
2+
3+
## Bug fixes
4+
5+
* Make sure `flatten` works correctly on a data frame with zero rows
6+
([#3198](https://github.com/JuliaData/DataFrames.jl/issues/3198))
7+
18
# DataFrames.jl v1.4.1 Patch Release Notes
29

310
## Bug fixes

Project.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
name = "DataFrames"
22
uuid = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
3-
version = "1.4.1"
3+
version = "1.4.2"
44

55
[deps]
66
Compat = "34da2185-b29b-5c13-b0c7-acf172513d20"

src/abstractdataframe/abstractdataframe.jl

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2568,10 +2568,11 @@ function flatten(df::AbstractDataFrame,
25682568
length(idxcols) > 1 && sort!(idxcols)
25692569
for col in idxcols
25702570
col_to_flatten = df[!, col]
2571-
flattened_col = col_to_flatten isa AbstractVector{<:AbstractVector} ?
2571+
fast_path = eltype(col_to_flatten) isa AbstractVector &&
2572+
!isempty(col_to_flatten)
2573+
flattened_col = fast_path ?
25722574
reduce(vcat, col_to_flatten) :
25732575
collect(Iterators.flatten(col_to_flatten))
2574-
25752576
insertcols!(new_df, col, _names(df)[col] => flattened_col)
25762577
end
25772578

test/reshape.jl

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -367,10 +367,13 @@ end
367367
@testset "flatten single column" begin
368368
df_vec = DataFrame(a=[1, 2], b=[[1, 2], [3, 4]])
369369
df_tup = DataFrame(a=[1, 2], b=[(1, 2), (3, 4)])
370+
@test flatten(empty(df_vec), :b) == DataFrame(a=[], b=[])
371+
@test flatten(empty(df_tup), :b) == DataFrame(a=[], b=[])
370372
ref = DataFrame(a=[1, 1, 2, 2], b=[1, 2, 3, 4])
371373
@test flatten(df_vec, :b) == flatten(df_tup, :b) == ref
372374
@test flatten(df_vec, "b") == flatten(df_tup, "b") == ref
373375
df_mixed_types = DataFrame(a=[1, 2], b=[[1, 2], ["x", "y"]])
376+
@test flatten(empty(df_mixed_types), :b) == DataFrame(a=[], b=[])
374377
ref_mixed_types = DataFrame(a=[1, 1, 2, 2], b=[1, 2, "x", "y"])
375378
@test flatten(df_mixed_types, :b) == ref_mixed_types
376379
df_three = DataFrame(a=[1, 2, 3], b=[[1, 2], [10, 20], [100, 200, 300]])
@@ -382,17 +385,20 @@ end
382385
@test flatten(df_gen, :b) == ref_gen
383386
@test flatten(df_gen, "b") == ref_gen
384387
df_miss = DataFrame(a=[1, 2], b=[Union{Missing, Int}[1, 2], Union{Missing, Int}[3, 4]])
388+
@test flatten(empty(df_miss), :b) == DataFrame(a=[], b=[])
385389
ref = DataFrame(a=[1, 1, 2, 2], b=[1, 2, 3, 4])
386390
@test flatten(df_miss, :b) == ref
387391
@test flatten(df_miss, "b") == ref
388392
v1 = [[1, 2], [3, 4]]
389393
v2 = [[5, 6], [7, 8]]
390394
v = [v1, v2]
391395
df_vec_vec = DataFrame(a=[1, 2], b=v)
396+
@test flatten(empty(df_vec_vec), :b) == DataFrame(a=[], b=[])
392397
ref_vec_vec = DataFrame(a=[1, 1, 2, 2], b=[v1 ; v2])
393398
@test flatten(df_vec_vec, :b) == ref_vec_vec
394399
@test flatten(df_vec_vec, "b") == ref_vec_vec
395400
df_cat = DataFrame(a=[1, 2], b=[CategoricalArray([1, 2]), CategoricalArray([1, 2])])
401+
@test flatten(empty(df_cat), :b) == DataFrame(a=[], b=[])
396402
df_flat_cat = flatten(df_cat, :b)
397403
ref_cat = DataFrame(a=[1, 1, 2, 2], b=[1, 2, 1, 2])
398404
@test df_flat_cat == ref_cat
@@ -401,6 +407,9 @@ end
401407

402408
@testset "flatten multiple columns" begin
403409
df = DataFrame(a=[1, 2], b=[[1, 2], [3, 4]], c=[[5, 6], [7, 8]])
410+
@test flatten(empty(df), []) == DataFrame(a=[], b=[], c=[])
411+
@test flatten(empty(df), [:b, :c]) == DataFrame(a=[], b=[], c=[])
412+
@test flatten(empty(df), All()) == DataFrame(a=[], b=[], c=[])
404413
@test flatten(df, []) == df
405414
ref = DataFrame(a=[1, 1, 2, 2], b=[1, 2, 3, 4], c=[5, 6, 7, 8])
406415
@test flatten(df, [:b, :c]) == ref
@@ -418,6 +427,8 @@ end
418427
@test flatten(df_allcols, :) == ref_allcols
419428
df_bad = DataFrame(a=[1, 2], b=[[1, 2], [3, 4]], c=[[5, 6], [7]])
420429
@test_throws ArgumentError flatten(df_bad, [:b, :c])
430+
@test flatten(DataFrame(), []) == DataFrame()
431+
@test flatten(DataFrame(), All()) == DataFrame()
421432
end
422433

423434
@testset "stack categorical test" begin

0 commit comments

Comments
 (0)