Skip to content

Commit ac0fe7f

Browse files
implement load_function kwarg for collect_results! (#424)
The changes in this branch are a follow up from a previous pull request based on commit 6e6ff07 in PR #421. In that PR there were issues with whitespace changes inadvertantly coming from the autoformatter in vscode. Reverting the whitespace only changes proved to be more difficult than anticicpated. So to resolve this, this branch was created and a new PR will be created from it. The whitespace issues are gone but all the feedback and changes from the original PR are retained. The commit makes the following changes. - add the `load_function` kwarg to `collect_results`. This allows customizing how data is loaded from file before being processed into a dataframe by `collect_results`. - add a test to `update_result_tests.jl` - update docstring of `collect_results` - increase package version to 2.16.0 - update `CHANGELOG.md` All tests passed, 589 of 589.
1 parent 09e1029 commit ac0fe7f

File tree

4 files changed

+30
-9
lines changed

4 files changed

+30
-9
lines changed

CHANGELOG.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,7 @@
1+
# 2.16.0
2+
3+
- Add `load_function` keyword argument to `collect_results` to customize how data is loaded from file before being converted to a dataframe by `collect_results`
4+
15
# 2.15.0
26

37
- Add `wload_kwargs` to `produce_or_load` to allow passing kwargs to `wload`

Project.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
name = "DrWatson"
22
uuid = "634d3b9d-ee7a-5ddf-bec9-22491ea816e1"
33
repo = "https://github.com/JuliaDynamics/DrWatson.jl.git"
4-
version = "2.15.0"
4+
version = "2.16.0"
55

66
[deps]
77
Dates = "ade2ca70-3891-5945-98fb-dc099432e06a"

src/result_collection.jl

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@ See also [`collect_results`](@ref).
5050
* `black_list = [:gitcommit, :gitpatch, :script]`: List of keys not to include from result-file.
5151
* `special_list = []`: List of additional (derived) key-value pairs
5252
to put in `df` as explained below.
53+
* `load_function = wload`: Load function. Defaults to `wload`. You may want to specify a custom load function for example if you store results as a struct and you want the fields of the struct to form the columns of the dataframe. The struct is saved to file as a one-element dictionary so the dataframe will only have a single column. To work around this you could convert it to a dictionary by specifying `load_function = (filename) -> struct2dict(wload(filename)["mykey"])`. This way `collect_results` will receive a `Dict` whose keys are the fields of the struct.
5354
5455
`special_list` is a `Vector` where each entry
5556
is a derived quantity to be included in `df`. There are two types of entries.
@@ -90,6 +91,7 @@ function collect_results!(filename, folder;
9091
newfile = false, # keyword only for defining collect_results without !
9192
rinclude = [r""],
9293
rexclude = [r"^\b$"],
94+
load_function = wload,
9395
kwargs...)
9496

9597
@assert all(eltype(r) <: Regex for r in (rinclude, rexclude)) "Elements of `rinclude` and `rexclude` must be Regex expressions."
@@ -100,7 +102,7 @@ function collect_results!(filename, folder;
100102
mtimes = Dict{String,Float64}()
101103
else
102104
verbose && @info "Loading existing result collection..."
103-
data = wload(filename)
105+
data = load_function(filename)
104106
df = data["df"]
105107
# Check if we have pre-recorded mtimes (if not this could be because of an old results database).
106108
if "mtime" keys(data)
@@ -170,7 +172,7 @@ function collect_results!(filename, folder;
170172
mtimes[file] = mtime_file
171173

172174
fpath = rpath === nothing ? file : joinpath(rpath, file)
173-
df_new = to_data_row(FileIO.query(fpath); kwargs...)
175+
df_new = to_data_row(FileIO.query(fpath); load_function=load_function, kwargs...)
174176
#add filename
175177
df_new[!, :path] .= file
176178
if replace_entry
@@ -231,18 +233,17 @@ is_valid_file(file, valid_filetypes) =
231233
any(endswith(file, v) for v in valid_filetypes)
232234

233235
# Use wload per default when nothing else is available
234-
function to_data_row(file::File; kwargs...)
236+
function to_data_row(file::File; load_function=wload, kwargs...)
235237
fpath = filename(file)
236238
@debug "Opening $(filename(file)) with fallback wload."
237-
return to_data_row(wload(fpath), fpath; kwargs...)
239+
return to_data_row(load_function(fpath), fpath; kwargs...)
238240
end
239241
# Specialize for JLD2 files, can do much faster mmapped access
240-
function to_data_row(file::File{format"JLD2"}; kwargs...)
242+
function to_data_row(file::File{format"JLD2"}; load_function=(filename) -> JLD2.jldopen(filename, "r"), kwargs...)
241243
fpath = filename(file)
242244
@debug "Opening $(filename(file)) with jldopen."
243-
JLD2.jldopen(filename(file), "r") do data
244-
return to_data_row(data, fpath; kwargs...)
245-
end
245+
data = load_function(fpath)
246+
return to_data_row(data, fpath; kwargs...)
246247
end
247248
function to_data_row(data, file;
248249
white_list = collect(keys(data)),

test/update_results_tests.jl

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,22 @@ cres_relpath = collect_results!(relpathname, folder;
6464
rpath = projectdir())
6565
@info all(startswith.(cres[!,"path"], "data"))
6666

67+
struct dummy
68+
a::Float64
69+
b::Int64
70+
c::Matrix{Float64}
71+
end
72+
_dummy_matrix = rand(3,3)
73+
_dummy = dummy(1.0, 1, _dummy_matrix)
74+
wsave(datadir("dummy.jld2"), "dummy", _dummy)
75+
76+
actual_dataframe = collect_results(datadir(), rinclude=[r"dummy.jld2"], load_function=(filename) -> struct2dict(wload(filename)["dummy"]))
77+
_dataframe_vector = Vector{Union{Missing, Matrix{Float64}}}(undef, 1)
78+
_dataframe_vector[1] = _dummy_matrix
79+
expected_dataframe = DataFrame(a = 1.0, b = 1, c = _dataframe_vector, path = datadir("dummy.jld2"))
80+
81+
@test actual_dataframe == expected_dataframe
82+
6783
###############################################################################
6884
# Trailing slash in foldername #
6985
###############################################################################

0 commit comments

Comments
 (0)