Skip to content

Commit 7a51bf8

Browse files
committed
group_by + summarise #4
1 parent 4da15b7 commit 7a51bf8

13 files changed

+387
-250
lines changed

R/Math.R

+26
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
2+
# Math.datacube <- function(x, ..., p = openeo::processes()){
3+
# if (!.Generic %in% names(p)){
4+
# stop(cli::format_error(paste(.Generic, "is not an available process")))
5+
# }
6+
# p[[.Generic]](x)
7+
# }
8+
#
9+
# get_fn <- function(op) {
10+
# tbl = data.frame(sym = c("==", "!=", "<", ">", "<=", ">="), fn = c("eq", "neq", "lt", "gt", "lte", "gte")) # etc...
11+
# fn = tbl[match(x, tbl$sym),]$fn
12+
# if (is.na(fn)){
13+
# stop(cli::format_error(paste(op, "not available as operator")))
14+
# }
15+
# fn
16+
# }
17+
#
18+
# Ops.datacube <- function(e1, e2) {
19+
# p = openeo::processes() # can't pass as parameter here
20+
# fn = get_fn(.Generic)
21+
# if (!fn %in% names(p)){
22+
# stop(cli::format_error(paste(.Generic, "is not available as a process")))
23+
# }
24+
# p[[fn]](e1, e1)
25+
# }
26+

R/filter.R

+4-15
Original file line numberDiff line numberDiff line change
@@ -67,11 +67,6 @@
6767
#' st_as_sfc()
6868
#'
6969
#' dc = dc2 %>% filter(.geometries = pol)
70-
#'
71-
#' # array_filter
72-
#' # ToDO...
73-
#' # filter_labels
74-
#' # ToDO
7570
#' @export
7671
filter.datacube <- function(.data = NULL, ...,
7772
.condition = NULL, .dimension = NULL, .context = NULL,
@@ -80,12 +75,8 @@ filter.datacube <- function(.data = NULL, ...,
8075
.p = openeo::processes(.con), .con = NULL) {
8176

8277
#check dots ...
83-
dots = list(...)
84-
85-
for (i in dots){
86-
if (length(dots) != 0){
87-
inherits(dots)
88-
}
78+
if (length(list(...)) > 0) {
79+
cli::cli_alert_warning("Additional arguments were passed")
8980
}
9081

9182
# check mandatory argument
@@ -285,8 +276,7 @@ filter.datacube <- function(.data = NULL, ...,
285276
}
286277

287278
#array_filter
288-
if (all(is.null(.geometries), is.null(.extent), !is.null(.condition),
289-
is.null(.dimension))) {
279+
if (all(is.null(.geometries), is.null(.extent), !is.null(.condition), is.null(.dimension))) {
290280
dc = .p$array_filter(data = .data, condition = .condition, context = .context)
291281
cli::cli_alert_success("array_filter applied")
292282
}
@@ -300,8 +290,7 @@ filter.datacube <- function(.data = NULL, ...,
300290
if (inherits(.condition, "logical", "TRUE") == 0){
301291
cli::cli_alert_danger("dimension arg must be logical")}
302292

303-
dc = .p$filter_labels(.data, condition = .condition,
304-
dimension = .dimension, context = .context)
293+
dc = .p$filter_labels(.data, condition = .condition, dimension = .dimension, context = .context)
305294
cli::cli_alert_success("filter_labels applied")
306295
}
307296

R/group_by.R

+40-127
Original file line numberDiff line numberDiff line change
@@ -1,83 +1,30 @@
11

22
#' @title Group by Datacube
3-
#' @description Group by datacube wraps the aggregate_temporal_period(https://processes.openeo.org/#aggregate_temporal_period),
4-
#' aggregate_spatial (https://processes.openeo.org/#aggregate_spatial),
5-
#' and aggregate_temporal(https://processes.openeo.org/#aggregate_temporal),
6-
#' functions into a simulated dplyr's \code{\link[dplyr]{group_by}}.
3+
#' @description Group by datacube works similarly to the dplyr's \code{\link[dplyr]{group_by}}.
4+
#' It does not truly modifiy the datacube, but it registers a grouping or aggregation
5+
#' strategy. One can aggregate a datacube by its spatial dimension, or maybe its
6+
#' temporal dimension, or even a geometry (sf object).
7+
#'
8+
#' The group_by function interacts directly with summarise and it basically will create
9+
#' a subclass called "grouped datacube", with its aggregation method in its environment.
10+
#' That will be searched by the summarise function, when summarising.
711
#' @name group_by
812
#' @rdname group_by
913
#' @param .data datacube object from tidyopeneo
1014
#' @param ... any parameter inherited from dplyr
11-
#' @param .period (optional) For **aggregate_temporal_period** : The time intervals to aggregate.
12-
#' The following pre-defined values are available:* `hour`: Hour of the day* `day`:
13-
#' Day of the year* `week`: Week of the year* `dekad`: Ten day periods,
14-
#' counted per year with three periods per month (day 1 - 10, 11 - 20 and 21 -
15-
#' end of month). The third dekad of the month can range from 8 to 11 days.
16-
#' For example, the fourth dekad is Feb, 1 - Feb, 10 each year.
17-
#' * `month`: Month of the year* `season`: Three month periods of the calendar
18-
#' seasons (December - February, March - May, June - August, September - November).
19-
#' * `tropical-season`: Six month periods of the tropical seasons (November -
20-
#' April, May - October).* `year`: Proleptic years* `decade`: Ten year periods
21-
#' ([0-to-9 decade](https://en.wikipedia.org/wiki/Decade#0-to-9_decade)), from a
22-
#' year ending in a 0 to the next year ending in a 9.* `decade-ad`: Ten year
23-
#' periods ([1-to-0 decade](https://en.wikipedia.org/wiki/Decade#1-to-0_decade))
24-
#' better aligned with the anno Domini (AD) calendar era, from a year ending in
25-
#' a 1 to the next year ending in a 0.
26-
#' @param .reducer A reducer to be applied for the values contained in each period.
27-
#' A reducer is a single process such as ``mean()`` or a set of processes, which
28-
#' computes a single value for a list of values, see the category 'reducer' for
29-
#' such processes. Periods may not contain any values, which for most reducers
30-
#' leads to no-data (`null`) values by default. It may also be a character referring to one
31-
#' of openeo reducing functions, such as, mean, sum, min, max, etc.
32-
#' @param .dimension (optional). For **aggregate_temporal_period** and **aggregate_temporal** (optional) :
33-
#' The name of the temporal dimension for aggregation. All
34-
#' data along the dimension is passed through the specified reducer. If the
35-
#' dimension is not set or set to `null`, the data cube is expected to only
36-
#' have one temporal dimension. Fails with a `TooManyDimensions` exception if
37-
#' it has more dimensions. Fails with a `DimensionNotAvailable` exception if the
38-
#' specified dimension does not exist.
39-
#' @param .context (optional) Additional data to be passed to the reducer.
40-
#' @param .geometries (optional). For **aggregate_spatial** : Geometries as GeoJSON on which
41-
#' the aggregation will be based.
42-
#' One value will be computed per GeoJSON `Feature`, `Geometry` or
43-
#' `GeometryCollection`. For a `FeatureCollection` multiple values will be computed,
44-
#' one value per contained `Feature`. For example, a single value will be computed
45-
#' for a `MultiPolygon`, but two values will be computed for a `FeatureCollection`
46-
#' containing two polygons.- For **polygons**, the process considers all
47-
#' pixels for which the point at the pixel centre intersects with the corresponding
48-
#' polygon (as defined in the Simple Features standard by the OGC).
49-
#' For **points**, the process considers the closest pixel centre.
50-
#' For **lines** (line strings), the process considers all the pixels whose centres
51-
#' are closest to at least one point on the line.Thus, pixels may be part of
52-
#' multiple geometries and be part of multiple aggregations.To maximize
53-
#' interoperability, a nested `GeometryCollection` should be avoided.
54-
#' Furthermore, a `GeometryCollection` composed of a single type of geometries
55-
#' should be avoided in favour of the corresponding multi-part type
56-
#' (e.g. `MultiPolygon`).
57-
#' @param .target_dimension (optional). For **aggregate-spatial** (optional) : The new dimension name
58-
#' to be used for storing the results. Defaults to `result`.
59-
#' @param .intervals (optional). For **aggregate_temporal** : Left-closed temporal intervals,
60-
#' which are allowed to overlap.
61-
#' Each temporal interval in the array has exactly two elements:1.
62-
#' The first element is the start of the temporal interval. The specified instance
63-
#' in time is **included** in the interval.2. The second element is the end of
64-
#' the temporal interval. The specified instance in time is **excluded** from the
65-
#' interval.The specified temporal strings follow
66-
#' RFC 3339(https://www.rfc-editor.org/rfc/rfc3339.html). Although RFC 3339 prohibits
67-
#' the hour to be '24'(https://www.rfc-editor.org/rfc/rfc3339.html#section-5.7),
68-
#' **this process allows the value '24' for the hour** of an end time in order
69-
#' to make it possible that left-closed time intervals can fully cover the day.
70-
#' @param .labels (optional). For **aggregate_temporal** (optional) : Distinct labels for the intervals, which can contain dates
71-
#' and/or times. Is only required to be specified if the values for the start of
72-
#' the temporal intervals are not distinct and thus the default labels would not
73-
#' be unique. The number of labels and the number of groups need to be equal.
74-
#' @param .con (optional) openeo connection. Default to NULL
75-
#' @param .p (optional) processes available at .con
76-
#' @return datacube
77-
#' @import dplyr openeo cli sf
78-
#' @details If .period is defined, aggregate_temporal_period is run. Else if
79-
#' .geometries is defined, aggregate_spatial runs. Otherwise, if .intervals is passed,
80-
#' aggregate_temporal runs.
15+
#' @param .by aggregation method, such as:
16+
#' 1. "hour", "day", "week", "dekad", "month", "season", "tropical-season", "year", "decade", "decade-ad" for
17+
#' aggregate temporal period.
18+
#'
19+
#' 2. sf object for aggregate spatial
20+
#'
21+
#' 3. list with 2 intervals for aggreggate temporal period
22+
#'
23+
#' 4. 'time', 'temporal', 't' for reduce temporal dimension
24+
#'
25+
#' 5. 'space', 'spatial', 's' for reduce spatial dimension
26+
#' @return grouped datacube
27+
#' @import dplyr openeo sf
8128
#' @seealso [openeo::list_processes()]
8229
#' @importFrom dplyr group_by
8330
#' @examples
@@ -119,26 +66,21 @@
11966
#' p = openeo::processes()
12067
#'
12168
#' # aggregate spatially
122-
#' dc_mean <- dc %>% group_by(.reducer = function(data, context) { p$mean(data) },
123-
#' .geometries = polygons)
69+
#' dc_mean <- dc %>%
70+
#' group_by(polygons) %>%
71+
#' summarise("mean")
72+
#'
73+
#' # reduce temporal dimension
74+
#' dc_sum <- dc %>%
75+
#' group_by("t") %>%
76+
#' summarise("sum")
12477
#'
125-
#' # the same result can be obtained with the simplified version ...
126-
#' dc_mean <- dc %>% group_by(.reducer = "mean",
127-
#' .geometries = polygons)
12878
#' @export
129-
group_by.datacube <- function(.data = NULL, ..., .period = NULL, .reducer = NULL,
130-
.dimension = NULL, .context = NULL,
131-
.geometries = NULL, .target_dimension = "result",
132-
.intervals = NULL, .labels = array(),
133-
.p = openeo::processes(.con), .con = NULL) {
79+
group_by.datacube <- function(.data = NULL, .by = NULL, ...) {
13480

13581
#check dots ...
136-
dots = list(...)
137-
138-
for (i in dots){
139-
if (length(dots) != 0){
140-
inherits(dots)
141-
}
82+
if (length(list(...)) > 0) {
83+
cli::cli_alert_warning("Additional arguments were passed")
14284
}
14385

14486
# check mandatory argument
@@ -148,42 +90,13 @@ group_by.datacube <- function(.data = NULL, ..., .period = NULL, .reducer = NULL
14890
tidyopeneo MUST be passed"
14991
))}
15092

151-
# if reducer is present, it can be either a function call or a function name as string
152-
if (!is.null(.reducer)){
153-
154-
if(inherits(.reducer, "character")){
155-
reducing_process = .reducer
156-
.reducer = function(data, context) {.p[[reducing_process]](data)}
157-
}
158-
159-
}else{
160-
stop(cli::format_error("ERROR : no reducer passed or not implemented"))
161-
}
162-
163-
# aggregate_temporal_period
164-
if (all(!is.null(.data), !is.null(.period), is.null(.geometries), is.null(.intervals))) {
165-
dc = .p$aggregate_temporal_period(data = .data, period = .period, reducer = .reducer,
166-
dimension = .dimension, context = .context)
167-
cli::cli_alert_success("aggregate_temporal_period applied")
168-
}
169-
170-
# aggregate_spatial
171-
if (all(!is.null(.data), !is.null(.geometries), is.null(.period), is.null(.intervals))) {
172-
173-
dc = .p$aggregate_spatial(data = .data, geometries = .geometries,
174-
reducer = .reducer, target_dimension = .target_dimension,
175-
context = .context)
176-
cli::cli_alert_success("aggregate_spatial applied")
177-
}
178-
179-
# aggregate_temporal
180-
if (all(!is.null(.data), is.null(.geometries), is.null(.period), !is.null(.intervals))) {
181-
dc = .p$aggregate_temporal(data = .data, intervals = .intervals,
182-
reducer = .reducer, dimension = .dimension,
183-
context = .context)
184-
cli::cli_alert_success("aggregate_temporal applied")
185-
}
186-
187-
structure(dc, class = c("datacube", class(dc)))
93+
# add a tag to grouped dc
94+
# .data$group = .by
95+
group_env <- environment(.data)
96+
group_env$group <- .by
18897

98+
# check if there's a by
99+
attr(.data, "group_env") <- group_env
100+
structure(.data, class = unique(c("grouped datacube", class(.data)))) # Class definition
101+
return(.data)
189102
}

R/mutate.R

+2-6
Original file line numberDiff line numberDiff line change
@@ -117,12 +117,8 @@ mutate.datacube <- function(.data = NULL, ...,
117117
.p = openeo::processes(.con), .con = NULL) {
118118

119119
#check dots ...
120-
dots = list(...)
121-
122-
for (i in dots){
123-
if (length(dots) != 0){
124-
inherits(dots)
125-
}
120+
if (length(list(...)) > 0) {
121+
cli::cli_alert_warning("Additional arguments were passed")
126122
}
127123

128124
# check mandatory argument

R/rename.R

+2-6
Original file line numberDiff line numberDiff line change
@@ -39,12 +39,8 @@ rename.datacube <- function(.data = NULL, ..., .source, .target,
3939
.p = openeo::processes(.con), .con = NULL) {
4040

4141
#check dots ...
42-
dots = list(...)
43-
44-
for (i in dots){
45-
if (length(dots) != 0){
46-
inherits(dots)
47-
}
42+
if (length(list(...)) > 0) {
43+
cli::cli_alert_warning("Additional arguments were passed")
4844
}
4945

5046
# check mandatory argument

R/select.R

+2-6
Original file line numberDiff line numberDiff line change
@@ -37,12 +37,8 @@ select.datacube <- function(.data = NULL, ...,
3737

3838

3939
#check dots ...
40-
dots = list(...)
41-
42-
for (i in dots){
43-
if (length(dots) != 0){
44-
inherits(dots)
45-
}
40+
if (length(list(...)) > 0) {
41+
cli::cli_alert_warning("Additional arguments were passed")
4642
}
4743

4844
# check mandatory argument

R/slice.R

+3-6
Original file line numberDiff line numberDiff line change
@@ -49,14 +49,11 @@ slice.datacube <- function(.data = NULL, ...,
4949
.p = openeo::processes(.con), .con = NULL){
5050

5151
#check dots ...
52-
dots = list(...)
53-
54-
for (i in dots){
55-
if (length(dots) != 0){
56-
inherits(dots)
57-
}
52+
if (length(list(...)) > 0) {
53+
cli::cli_alert_warning("Additional arguments were passed")
5854
}
5955

56+
6057
# check mandatory argument
6158
if (is.null(.data)) {
6259
stop(cli::format_error(

0 commit comments

Comments
 (0)