File tree Expand file tree Collapse file tree 4 files changed +8
-11
lines changed
by_image/pyspark-notebook/units Expand file tree Collapse file tree 4 files changed +8
-11
lines changed Original file line number Diff line number Diff line change @@ -64,13 +64,12 @@ USER ${NB_UID}
64
64
# NOTE: It's important to ensure compatibility between Pandas versions.
65
65
# The pandas version in this Dockerfile should match the version
66
66
# on which the Pandas API for Spark is built.
67
- # To find the right version:
68
- # 1. Check out the Spark branch you are on: <https://github.com/apache/spark>
69
- # 2. Find the pandas version in the file `dev/infra/Dockerfile`.
67
+ # To find the right version, check the pandas version being installed here:
68
+ # https://github.com/apache/spark/blob/<SPARK_VERSION>/dev/infra/Dockerfile
70
69
RUN mamba install --yes \
71
70
'grpcio-status' \
72
71
'grpcio' \
73
- 'pandas=2.2.2 ' \
72
+ 'pandas=2.2.3 ' \
74
73
'pyarrow' && \
75
74
mamba clean --all -f -y && \
76
75
fix-permissions "${CONDA_DIR}" && \
Original file line number Diff line number Diff line change @@ -35,11 +35,8 @@ def get_latest_spark_version() -> str:
35
35
LOGGER .info ("Downloading Spark versions information" )
36
36
all_refs = get_all_refs ("https://archive.apache.org/dist/spark/" )
37
37
LOGGER .info (f"All refs: { all_refs } " )
38
- versions = [
39
- ref .removeprefix ("spark-" ).removesuffix ("/" )
40
- for ref in all_refs
41
- if re .match (r"^spark-\d" , ref ) is not None and "incubating" not in ref
42
- ]
38
+ pattern = re .compile (r"^spark-(\d+\.\d+\.\d+)/$" )
39
+ versions = [match .group (1 ) for ref in all_refs if (match := pattern .match (ref ))]
43
40
LOGGER .info (f"Available versions: { versions } " )
44
41
45
42
# Compare versions semantically
@@ -74,6 +71,7 @@ def download_spark(
74
71
spark_dir_name += f"-scala{ scala_version } "
75
72
LOGGER .info (f"Spark directory name: { spark_dir_name } " )
76
73
spark_url = spark_download_url / f"spark-{ spark_version } " / f"{ spark_dir_name } .tgz"
74
+ LOGGER .info (f"Spark download URL: { spark_url } " )
77
75
78
76
tmp_file = Path ("/tmp/spark.tar.gz" )
79
77
subprocess .check_call (
Original file line number Diff line number Diff line change 2
2
# Distributed under the terms of the Modified BSD License.
3
3
import pandas
4
4
5
- assert pandas .__version__ == "2.2.2 "
5
+ assert pandas .__version__ == "2.2.3 "
Original file line number Diff line number Diff line change @@ -17,7 +17,7 @@ def check_nbconvert(
17
17
no_warnings : bool = True ,
18
18
) -> str :
19
19
"""Check if nbconvert is able to convert a notebook file"""
20
- cont_data_file = "/home/jovyan/data/ " + host_file .name
20
+ cont_data_file = "/home/jovyan/" + host_file .name
21
21
22
22
output_dir = "/tmp"
23
23
LOGGER .info (
You can’t perform that action at this time.
0 commit comments