Skip to content

Commit 7bc7c88

Browse files
committed
Merge branch 'issue720-synchronize-docs'
2 parents 0fa49c3 + bae921a commit 7bc7c88

File tree

10 files changed

+423
-139
lines changed

10 files changed

+423
-139
lines changed

openeo/internal/documentation.py

Lines changed: 84 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,10 @@
44

55
import collections
66
import inspect
7+
import re
78
import textwrap
89
from functools import partial
9-
from typing import Callable, Optional, Tuple, TypeVar
10+
from typing import Callable, Dict, List, Optional, Sequence, Tuple, TypeVar, Union
1011

1112
# TODO: give this a proper public API?
1213
_process_registry = collections.defaultdict(list)
@@ -58,3 +59,85 @@ def decorate(f: Callable) -> Callable:
5859
return f
5960

6061
return decorate
62+
63+
64+
def get_docstring(obj: Union[str, Callable]) -> str:
65+
"""
66+
Get docstring of a method or function.
67+
"""
68+
if isinstance(obj, str):
69+
doc = obj
70+
else:
71+
doc = obj.__doc__
72+
return textwrap.dedent(doc)
73+
74+
75+
def extract_params(doc: Union[str, Callable]) -> Dict[str, str]:
76+
"""
77+
Extract parameters (``:param name:`` format) from a docstring.
78+
"""
79+
doc = get_docstring(doc)
80+
params_regex = re.compile(r"^:param\s+(?P<param>\w+)\s*:(?P<doc>.*(\n +.*)*)", re.MULTILINE)
81+
return {m.group("param"): m.group("doc").strip() for m in params_regex.finditer(doc)}
82+
83+
84+
def extract_return(doc: Union[str, Callable]) -> Union[str, None]:
85+
"""
86+
Extract return value description (``:return:`` format) from a docstring.
87+
"""
88+
doc = get_docstring(doc)
89+
return_regex = re.compile(r"^:return\s*:(?P<doc>.*(\n +.*)*)", re.MULTILINE)
90+
matches = [m.group("doc").strip() for m in return_regex.finditer(doc)]
91+
assert 0 <= len(matches) <= 1
92+
return matches[0] if matches else None
93+
94+
95+
def extract_main_description(doc: Union[str, Callable]) -> List[str]:
96+
"""
97+
Extract main description from a docstring:
98+
paragraphs before the params/returns description.
99+
"""
100+
paragraphs = []
101+
for part in re.split(r"\s*\n(?:\s*\n)+", get_docstring(doc)):
102+
if re.match(r"\s*:", part):
103+
break
104+
paragraphs.append(part.strip("\n"))
105+
assert len(paragraphs) > 0
106+
return paragraphs
107+
108+
109+
def assert_same_param_docs(doc_a: Union[str, Callable], doc_b: Union[str, Callable], only_intersection: bool = False):
110+
"""
111+
Compare parameters (``:param name:`` format) from two docstrings.
112+
"""
113+
# TODO: option to also check order?
114+
params_a = extract_params(doc_a)
115+
params_b = extract_params(doc_b)
116+
117+
if only_intersection:
118+
intersection = set(params_a.keys()).intersection(params_b.keys())
119+
params_a = {k: v for k, v in params_a.items() if k in intersection}
120+
params_b = {k: v for k, v in params_b.items() if k in intersection}
121+
122+
assert params_a == params_b
123+
124+
125+
def assert_same_return_docs(doc_a: Union[str, Callable], doc_b: Union[str, Callable]):
126+
"""
127+
Compare return value descriptions from two docstrings.
128+
"""
129+
assert extract_return(doc_a) == extract_return(doc_b)
130+
131+
132+
def assert_same_main_description(doc_a: Union[str, Callable], doc_b: Union[str, Callable], ignore: Sequence[str] = ()):
133+
"""
134+
Compare main description from two docstrings.
135+
"""
136+
description_a = extract_main_description(doc_a)
137+
description_b = extract_main_description(doc_b)
138+
139+
for s in ignore:
140+
description_a = [p.replace(s, "<IGNORED>") for p in description_a]
141+
description_b = [p.replace(s, "<IGNORED>") for p in description_b]
142+
143+
assert description_a == description_b

openeo/rest/connection.py

Lines changed: 32 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1559,21 +1559,27 @@ def download(
15591559
job_options: Optional[dict] = None,
15601560
) -> Union[None, bytes]:
15611561
"""
1562-
Downloads the result of a process graph synchronously,
1563-
and save the result to the given file or return bytes object if no outputfile is specified.
1564-
This method is useful to export binary content such as images. For json content, the execute method is recommended.
1562+
Send the underlying process graph to the backend
1563+
for synchronous processing and directly download the result.
1564+
1565+
If ``outputfile`` is provided, the result is downloaded to that path.
1566+
Otherwise a :py:class:`bytes` object is returned with the raw data.
15651567
15661568
:param graph: (flat) dict representing a process graph, or process graph as raw JSON string,
15671569
or as local file path or URL
1568-
:param outputfile: output file
1570+
:param outputfile: (optional) output path to download to.
15691571
:param timeout: timeout to wait for response
1570-
:param validate: Optional toggle to enable/prevent validation of the process graphs before execution
1572+
:param validate: (optional) toggle to enable/prevent validation of the process graphs before execution
15711573
(overruling the connection's ``auto_validate`` setting).
15721574
:param chunk_size: chunk size for streaming response.
1573-
:param additional: additional (top-level) properties to set in the request body
1574-
:param job_options: dictionary of job options to pass to the backend
1575+
:param additional: (optional) additional (top-level) properties to set in the request body
1576+
:param job_options: (optional) dictionary of job options to pass to the backend
15751577
(under top-level property "job_options")
15761578
1579+
:return: if ``outputfile`` was not specified:
1580+
a :py:class:`bytes` object containing the raw data.
1581+
Otherwise, ``None`` is returned.
1582+
15771583
.. versionadded:: 0.36.0
15781584
Added arguments ``additional`` and ``job_options``.
15791585
"""
@@ -1595,6 +1601,7 @@ def download(
15951601
with target.open(mode="wb") as f:
15961602
for chunk in response.iter_content(chunk_size=chunk_size):
15971603
f.write(chunk)
1604+
# TODO: return target path instead of None? Or return a generic result wrapper?
15981605
else:
15991606
return response.content
16001607

@@ -1659,27 +1666,35 @@ def create_job(
16591666
log_level: Optional[str] = None,
16601667
) -> BatchJob:
16611668
"""
1662-
Create a new job from given process graph on the back-end.
1669+
Send the underlying process graph to the backend
1670+
to create an openEO batch job
1671+
and return a corresponding :py:class:`~openeo.rest.job.BatchJob` instance.
1672+
1673+
Note that this method only *creates* the openEO batch job at the backend,
1674+
but it does not *start* it.
1675+
Use :py:meth:`execute_batch` instead to let the openEO Python client
1676+
take care of the full job life cycle: create, start and track its progress until completion.
16631677
16641678
:param process_graph: openEO-style (flat) process graph representation,
16651679
or an object that can be converted to such a representation:
16661680
a dictionary, a :py:class:`~openeo.rest.datacube.DataCube` object,
16671681
a string with a JSON representation,
16681682
a local file path or URL to a JSON representation,
16691683
a :py:class:`~openeo.rest.multiresult.MultiResult` object, ...
1670-
:param title: job title
1671-
:param description: job description
1672-
:param plan: The billing plan to process and charge the job with
1673-
:param budget: Maximum budget to be spent on executing the job.
1684+
:param title: (optional) job title.
1685+
:param description: (optional) job description.
1686+
:param plan: (optional) the billing plan to process and charge the job with.
1687+
:param budget: (optional) maximum budget to be spent on executing the job.
16741688
Note that some backends do not honor this limit.
1675-
:param additional: additional (top-level) properties to set in the request body
1676-
:param job_options: dictionary of job options to pass to the backend
1689+
:param additional: (optional) additional (top-level) properties to set in the request body
1690+
:param job_options: (optional) dictionary of job options to pass to the backend
16771691
(under top-level property "job_options")
1678-
:param validate: Optional toggle to enable/prevent validation of the process graphs before execution
1692+
:param validate: (optional) toggle to enable/prevent validation of the process graphs before execution
16791693
(overruling the connection's ``auto_validate`` setting).
1680-
:param log_level: Optional minimum severity level for log entries that the back-end should keep track of.
1694+
:param log_level: (optional) minimum severity level for log entries that the back-end should keep track of.
16811695
One of "error" (highest severity), "warning", "info", and "debug" (lowest severity).
1682-
:return: Created job
1696+
1697+
:return: Handle to the job created at the backend.
16831698
16841699
.. versionchanged:: 0.35.0
16851700
Add :ref:`multi-result support <multi-result-process-graphs>`.

openeo/rest/datacube.py

Lines changed: 40 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -2338,7 +2338,7 @@ def save_result(
23382338
Materialize the processed data to the given file format.
23392339
23402340
:param format: an output format supported by the backend.
2341-
:param options: file format options
2341+
:param options: (optional) file format options
23422342
23432343
.. versionchanged:: 0.39.0
23442344
returns a :py:class:`~openeo.rest.result.SaveResult` instance instead
@@ -2384,22 +2384,25 @@ def download(
23842384
job_options: Optional[dict] = None,
23852385
) -> Union[None, bytes]:
23862386
"""
2387-
Execute synchronously and download the raster data cube, e.g. as GeoTIFF.
2387+
Send the underlying process graph to the backend
2388+
for synchronous processing and directly download the result.
23882389
2389-
If outputfile is provided, the result is stored on disk locally, otherwise, a bytes object is returned.
2390-
The bytes object can be passed on to a suitable decoder for decoding.
2390+
If ``outputfile`` is provided, the result is downloaded to that path.
2391+
Otherwise a :py:class:`bytes` object is returned with the raw data.
23912392
2392-
:param outputfile: Optional, output path to download to.
2393-
:param format: Optional, an output format supported by the backend.
2394-
:param options: Optional, file format options
2395-
:param validate: Optional toggle to enable/prevent validation of the process graphs before execution
2393+
:param outputfile: (optional) output path to download to.
2394+
:param format: (optional) an output format supported by the backend.
2395+
:param options: (optional) file format options
2396+
:param validate: (optional) toggle to enable/prevent validation of the process graphs before execution
23962397
(overruling the connection's ``auto_validate`` setting).
2397-
:param auto_add_save_result: Automatically add a ``save_result`` node to the process graph.
2398-
:param additional: additional (top-level) properties to set in the request body
2399-
:param job_options: dictionary of job options to pass to the backend
2398+
:param auto_add_save_result: whether to automatically add a ``save_result`` node to the process graph.
2399+
:param additional: (optional) additional (top-level) properties to set in the request body
2400+
:param job_options: (optional) dictionary of job options to pass to the backend
24002401
(under top-level property "job_options")
24012402
2402-
:return: None if the result is stored to disk, or a bytes object returned by the backend.
2403+
:return: if ``outputfile`` was not specified:
2404+
a :py:class:`bytes` object containing the raw data.
2405+
Otherwise, ``None`` is returned.
24032406
24042407
.. versionchanged:: 0.32.0
24052408
Added ``auto_add_save_result`` option
@@ -2544,24 +2547,27 @@ def execute_batch(
25442547
for batch jobs that are expected to complete
25452548
in a time that is reasonable for your use case.
25462549
2547-
:param outputfile: Optional, output path to download to.
2548-
:param out_format: (optional) File format to use for the job result.
2549-
:param title: job title.
2550-
:param description: job description.
2551-
:param plan: The billing plan to process and charge the job with
2552-
:param budget: Maximum budget to be spent on executing the job.
2550+
:param outputfile: (optional) output path to download to.
2551+
:param out_format: (optional) file format to use for the job result.
2552+
:param title: (optional) job title.
2553+
:param description: (optional) job description.
2554+
:param plan: (optional) the billing plan to process and charge the job with.
2555+
:param budget: (optional) maximum budget to be spent on executing the job.
25532556
Note that some backends do not honor this limit.
2554-
:param additional: additional (top-level) properties to set in the request body
2555-
:param job_options: dictionary of job options to pass to the backend
2557+
:param additional: (optional) additional (top-level) properties to set in the request body
2558+
:param job_options: (optional) dictionary of job options to pass to the backend
25562559
(under top-level property "job_options")
2557-
:param validate: Optional toggle to enable/prevent validation of the process graphs before execution
2560+
:param validate: (optional) toggle to enable/prevent validation of the process graphs before execution
25582561
(overruling the connection's ``auto_validate`` setting).
2559-
:param auto_add_save_result: Automatically add a ``save_result`` node to the process graph.
2562+
:param auto_add_save_result: whether to automatically add a ``save_result`` node to the process graph.
25602563
:param show_error_logs: whether to automatically print error logs when the batch job failed.
2561-
:param log_level: Optional minimum severity level for log entries that the back-end should keep track of.
2564+
:param log_level: (optional) minimum severity level for log entries that the back-end should keep track of.
25622565
One of "error" (highest severity), "warning", "info", and "debug" (lowest severity).
25632566
:param max_poll_interval: maximum number of seconds to sleep between job status polls
25642567
:param connection_retry_interval: how long to wait when status poll failed due to connection issue
2568+
:param print: print/logging function to show progress/status
2569+
2570+
:return: Handle to the job created at the backend.
25652571
25662572
.. versionchanged:: 0.32.0
25672573
Added ``auto_add_save_result`` option
@@ -2632,22 +2638,22 @@ def create_job(
26322638
Use :py:meth:`execute_batch` instead to let the openEO Python client
26332639
take care of the full job life cycle: create, start and track its progress until completion.
26342640
2635-
:param out_format: output file format.
2636-
:param title: job title.
2637-
:param description: job description.
2638-
:param plan: The billing plan to process and charge the job with.
2639-
:param budget: Maximum budget to be spent on executing the job.
2641+
:param out_format: (optional) file format to use for the job result.
2642+
:param title: (optional) job title.
2643+
:param description: (optional) job description.
2644+
:param plan: (optional) the billing plan to process and charge the job with.
2645+
:param budget: (optional) maximum budget to be spent on executing the job.
26402646
Note that some backends do not honor this limit.
2641-
:param additional: additional (top-level) properties to set in the request body
2642-
:param job_options: dictionary of job options to pass to the backend
2647+
:param additional: (optional) additional (top-level) properties to set in the request body
2648+
:param job_options: (optional) dictionary of job options to pass to the backend
26432649
(under top-level property "job_options")
2644-
:param validate: Optional toggle to enable/prevent validation of the process graphs before execution
2650+
:param validate: (optional) toggle to enable/prevent validation of the process graphs before execution
26452651
(overruling the connection's ``auto_validate`` setting).
2646-
:param auto_add_save_result: Automatically add a ``save_result`` node to the process graph.
2647-
:param log_level: Optional minimum severity level for log entries that the back-end should keep track of.
2652+
:param auto_add_save_result: whether to automatically add a ``save_result`` node to the process graph.
2653+
:param log_level: (optional) minimum severity level for log entries that the back-end should keep track of.
26482654
One of "error" (highest severity), "warning", "info", and "debug" (lowest severity).
26492655
2650-
:return: Created job.
2656+
:return: Handle to the job created at the backend.
26512657
26522658
.. versionchanged:: 0.32.0
26532659
Added ``auto_add_save_result`` option

openeo/rest/job.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -233,12 +233,14 @@ def run_synchronous(
233233
"""
234234
Start the job, wait for it to finish and download result
235235
236-
:param outputfile: The path of a file to which a result can be written
236+
:param outputfile: (optional) output path to download to.
237237
:param print: print/logging function to show progress/status
238238
:param max_poll_interval: maximum number of seconds to sleep between job status polls
239239
:param connection_retry_interval: how long to wait when status poll failed due to connection issue
240240
:param show_error_logs: whether to automatically print error logs when the batch job failed.
241241
242+
:return: Handle to the job created at the backend.
243+
242244
.. versionchanged:: 0.37.0
243245
Added argument ``show_error_logs``.
244246
"""

0 commit comments

Comments
 (0)