Skip to content

Commit f9a75e5

Browse files
authored
Install Spark 4 release version (#2300)
1 parent 08cc9f7 commit f9a75e5

File tree

4 files changed

+8
-11
lines changed

4 files changed

+8
-11
lines changed

images/pyspark-notebook/Dockerfile

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -64,13 +64,12 @@ USER ${NB_UID}
6464
# NOTE: It's important to ensure compatibility between Pandas versions.
6565
# The pandas version in this Dockerfile should match the version
6666
# on which the Pandas API for Spark is built.
67-
# To find the right version:
68-
# 1. Check out the Spark branch you are on: <https://github.com/apache/spark>
69-
# 2. Find the pandas version in the file `dev/infra/Dockerfile`.
67+
# To find the right version, check the pandas version being installed here:
68+
# https://github.com/apache/spark/blob/<SPARK_VERSION>/dev/infra/Dockerfile
7069
RUN mamba install --yes \
7170
'grpcio-status' \
7271
'grpcio' \
73-
'pandas=2.2.2' \
72+
'pandas=2.2.3' \
7473
'pyarrow' && \
7574
mamba clean --all -f -y && \
7675
fix-permissions "${CONDA_DIR}" && \

images/pyspark-notebook/setup_spark.py

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -35,11 +35,8 @@ def get_latest_spark_version() -> str:
3535
LOGGER.info("Downloading Spark versions information")
3636
all_refs = get_all_refs("https://archive.apache.org/dist/spark/")
3737
LOGGER.info(f"All refs: {all_refs}")
38-
versions = [
39-
ref.removeprefix("spark-").removesuffix("/")
40-
for ref in all_refs
41-
if re.match(r"^spark-\d", ref) is not None and "incubating" not in ref
42-
]
38+
pattern = re.compile(r"^spark-(\d+\.\d+\.\d+)/$")
39+
versions = [match.group(1) for ref in all_refs if (match := pattern.match(ref))]
4340
LOGGER.info(f"Available versions: {versions}")
4441

4542
# Compare versions semantically
@@ -74,6 +71,7 @@ def download_spark(
7471
spark_dir_name += f"-scala{scala_version}"
7572
LOGGER.info(f"Spark directory name: {spark_dir_name}")
7673
spark_url = spark_download_url / f"spark-{spark_version}" / f"{spark_dir_name}.tgz"
74+
LOGGER.info(f"Spark download URL: {spark_url}")
7775

7876
tmp_file = Path("/tmp/spark.tar.gz")
7977
subprocess.check_call(

tests/by_image/pyspark-notebook/units/unit_pandas_version.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,4 +2,4 @@
22
# Distributed under the terms of the Modified BSD License.
33
import pandas
44

5-
assert pandas.__version__ == "2.2.2"
5+
assert pandas.__version__ == "2.2.3"

tests/shared_checks/nbconvert_check.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ def check_nbconvert(
1717
no_warnings: bool = True,
1818
) -> str:
1919
"""Check if nbconvert is able to convert a notebook file"""
20-
cont_data_file = "/home/jovyan/data/" + host_file.name
20+
cont_data_file = "/home/jovyan/" + host_file.name
2121

2222
output_dir = "/tmp"
2323
LOGGER.info(

0 commit comments

Comments
 (0)