Skip to content

Commit 469baa9

Browse files
authored
Merge branch 'apache:master' into windowFuncFix
2 parents e99ddda + 72d11ea commit 469baa9

File tree

30 files changed

+781
-241
lines changed

30 files changed

+781
-241
lines changed

.github/workflows/build_and_test.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1240,7 +1240,7 @@ jobs:
12401240
sudo apt update
12411241
sudo apt-get install r-base
12421242
- name: Start Minikube
1243-
uses: medyagh/setup-minikube@v0.0.18
1243+
uses: medyagh/setup-minikube@v0.0.19
12441244
with:
12451245
kubernetes-version: "1.33.0"
12461246
# Github Action limit cpu:2, memory: 6947MB, limit to 2U6G for better resource statistic

.github/workflows/build_infra_images_cache.yml

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ on:
3636
- 'dev/spark-test-image/python-309/Dockerfile'
3737
- 'dev/spark-test-image/python-310/Dockerfile'
3838
- 'dev/spark-test-image/python-311/Dockerfile'
39+
- 'dev/spark-test-image/python-311-classic-only/Dockerfile'
3940
- 'dev/spark-test-image/python-312/Dockerfile'
4041
- 'dev/spark-test-image/python-313/Dockerfile'
4142
- 'dev/spark-test-image/python-313-nogil/Dockerfile'
@@ -191,6 +192,19 @@ jobs:
191192
- name: Image digest (PySpark with Python 3.11)
192193
if: hashFiles('dev/spark-test-image/python-311/Dockerfile') != ''
193194
run: echo ${{ steps.docker_build_pyspark_python_311.outputs.digest }}
195+
- name: Build and push (PySpark Classic Only with Python 3.11)
196+
if: hashFiles('dev/spark-test-image/python-311-classic-only/Dockerfile') != ''
197+
id: docker_build_pyspark_python_311_classic_only
198+
uses: docker/build-push-action@v6
199+
with:
200+
context: ./dev/spark-test-image/python-311-classic-only/
201+
push: true
202+
tags: ghcr.io/apache/spark/apache-spark-github-action-image-pyspark-python-311-classic-only-cache:${{ github.ref_name }}-static
203+
cache-from: type=registry,ref=ghcr.io/apache/spark/apache-spark-github-action-image-pyspark-python-311-classic-only-cache:${{ github.ref_name }}
204+
cache-to: type=registry,ref=ghcr.io/apache/spark/apache-spark-github-action-image-pyspark-python-311-classic-only-cache:${{ github.ref_name }},mode=max
205+
- name: Image digest (PySpark Classic Only with Python 3.11)
206+
if: hashFiles('dev/spark-test-image/python-311-classic-only/Dockerfile') != ''
207+
run: echo ${{ steps.docker_build_pyspark_python_311_classic_only.outputs.digest }}
194208
- name: Build and push (PySpark with Python 3.12)
195209
if: hashFiles('dev/spark-test-image/python-312/Dockerfile') != ''
196210
id: docker_build_pyspark_python_312
Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
#
2+
# Licensed to the Apache Software Foundation (ASF) under one
3+
# or more contributor license agreements. See the NOTICE file
4+
# distributed with this work for additional information
5+
# regarding copyright ownership. The ASF licenses this file
6+
# to you under the Apache License, Version 2.0 (the
7+
# "License"); you may not use this file except in compliance
8+
# with the License. You may obtain a copy of the License at
9+
#
10+
# http://www.apache.org/licenses/LICENSE-2.0
11+
#
12+
# Unless required by applicable law or agreed to in writing,
13+
# software distributed under the License is distributed on an
14+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
# KIND, either express or implied. See the License for the
16+
# specific language governing permissions and limitations
17+
# under the License.
18+
#
19+
20+
name: "Build / Python-only Classic-only (master, Python 3.11)"
21+
22+
on:
23+
schedule:
24+
- cron: '0 0 */3 * *'
25+
workflow_dispatch:
26+
27+
jobs:
28+
run-build:
29+
permissions:
30+
packages: write
31+
name: Run
32+
uses: ./.github/workflows/build_and_test.yml
33+
if: github.repository == 'apache/spark'
34+
with:
35+
java: 17
36+
branch: master
37+
hadoop: hadoop3
38+
envs: >-
39+
{
40+
"PYSPARK_IMAGE_TO_TEST": "python-311-classic-only",
41+
"PYTHON_TO_TEST": "python3.11"
42+
}
43+
jobs: >-
44+
{
45+
"pyspark": "true",
46+
"pyspark-pandas": "true"
47+
}

common/utils/src/main/resources/error/error-conditions.json

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -8641,11 +8641,6 @@
86418641
"duration() called on unfinished task"
86428642
]
86438643
},
8644-
"_LEGACY_ERROR_TEMP_3027" : {
8645-
"message" : [
8646-
"Unrecognized <schedulerModeProperty>: <schedulingModeConf>"
8647-
]
8648-
},
86498644
"_LEGACY_ERROR_TEMP_3028" : {
86508645
"message" : [
86518646
"<errorMsg>"

common/utils/src/main/scala/org/apache/spark/internal/config/ConfigBuilder.scala

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,15 @@ private object ConfigHelpers {
5757
}
5858
}
5959

60+
def toEnum[E <: Enum[E]](s: String, enumClass: Class[E], key: String): E = {
61+
enumClass.getEnumConstants.find(_.name().equalsIgnoreCase(s.trim)) match {
62+
case Some(enum) => enum
63+
case None =>
64+
throw new IllegalArgumentException(
65+
s"$key should be one of ${enumClass.getEnumConstants.mkString(", ")}, but was $s")
66+
}
67+
}
68+
6069
def stringToSeq[T](str: String, converter: String => T): Seq[T] = {
6170
SparkStringUtils.stringToSeq(str).map(converter)
6271
}
@@ -287,6 +296,11 @@ private[spark] case class ConfigBuilder(key: String) {
287296
new TypedConfigBuilder(this, toEnum(_, e, key))
288297
}
289298

299+
def enumConf[E <: Enum[E]](e: Class[E]): TypedConfigBuilder[E] = {
300+
checkPrependConfig
301+
new TypedConfigBuilder(this, toEnum(_, e, key))
302+
}
303+
290304
def timeConf(unit: TimeUnit): TypedConfigBuilder[Long] = {
291305
checkPrependConfig
292306
new TypedConfigBuilder(this, timeFromString(_, unit), timeToString(_, unit))

core/src/main/scala/org/apache/spark/deploy/ExternalShuffleService.scala

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,6 @@ import org.apache.spark.network.crypto.AuthServerBootstrap
3131
import org.apache.spark.network.netty.SparkTransportConf
3232
import org.apache.spark.network.server.{TransportServer, TransportServerBootstrap}
3333
import org.apache.spark.network.shuffle.ExternalBlockHandler
34-
import org.apache.spark.network.shuffledb.DBBackend
3534
import org.apache.spark.network.util.TransportConf
3635
import org.apache.spark.util.{ShutdownHookManager, Utils}
3736

@@ -86,11 +85,11 @@ class ExternalShuffleService(sparkConf: SparkConf, securityManager: SecurityMana
8685
protected def newShuffleBlockHandler(conf: TransportConf): ExternalBlockHandler = {
8786
if (sparkConf.get(config.SHUFFLE_SERVICE_DB_ENABLED) && enabled) {
8887
val shuffleDBName = sparkConf.get(config.SHUFFLE_SERVICE_DB_BACKEND)
89-
val dbBackend = DBBackend.byName(shuffleDBName)
90-
logInfo(log"Use ${MDC(SHUFFLE_DB_BACKEND_NAME, dbBackend.name())} as the implementation of " +
88+
logInfo(
89+
log"Use ${MDC(SHUFFLE_DB_BACKEND_NAME, shuffleDBName.name())} as the implementation of " +
9190
log"${MDC(SHUFFLE_DB_BACKEND_KEY, config.SHUFFLE_SERVICE_DB_BACKEND.key)}")
9291
new ExternalBlockHandler(conf,
93-
findRegisteredExecutorsDBFile(dbBackend.fileName(registeredExecutorsDB)))
92+
findRegisteredExecutorsDBFile(shuffleDBName.fileName(registeredExecutorsDB)))
9493
} else {
9594
new ExternalBlockHandler(conf, null)
9695
}

core/src/main/scala/org/apache/spark/errors/SparkCoreErrors.scala

Lines changed: 0 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -267,19 +267,6 @@ private[spark] object SparkCoreErrors {
267267
new SparkUnsupportedOperationException("_LEGACY_ERROR_TEMP_3026")
268268
}
269269

270-
def unrecognizedSchedulerModePropertyError(
271-
schedulerModeProperty: String,
272-
schedulingModeConf: String): Throwable = {
273-
new SparkException(
274-
errorClass = "_LEGACY_ERROR_TEMP_3027",
275-
messageParameters = Map(
276-
"schedulerModeProperty" -> schedulerModeProperty,
277-
"schedulingModeConf" -> schedulingModeConf
278-
),
279-
cause = null
280-
)
281-
}
282-
283270
def sparkError(errorMsg: String): Throwable = {
284271
new SparkException(
285272
errorClass = "_LEGACY_ERROR_TEMP_3028",

core/src/main/scala/org/apache/spark/internal/config/package.scala

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -807,10 +807,8 @@ package object config {
807807
.doc("Specifies a disk-based store used in shuffle service local db. " +
808808
"ROCKSDB or LEVELDB (deprecated).")
809809
.version("3.4.0")
810-
.stringConf
811-
.transform(_.toUpperCase(Locale.ROOT))
812-
.checkValues(DBBackend.values.map(_.toString).toSet)
813-
.createWithDefault(DBBackend.ROCKSDB.name)
810+
.enumConf(classOf[DBBackend])
811+
.createWithDefault(DBBackend.ROCKSDB)
814812

815813
private[spark] val SHUFFLE_SERVICE_PORT =
816814
ConfigBuilder("spark.shuffle.service.port").version("1.2.0").intConf.createWithDefault(7337)
@@ -2295,9 +2293,8 @@ package object config {
22952293
private[spark] val SCHEDULER_MODE =
22962294
ConfigBuilder("spark.scheduler.mode")
22972295
.version("0.8.0")
2298-
.stringConf
2299-
.transform(_.toUpperCase(Locale.ROOT))
2300-
.createWithDefault(SchedulingMode.FIFO.toString)
2296+
.enumConf(SchedulingMode)
2297+
.createWithDefault(SchedulingMode.FIFO)
23012298

23022299
private[spark] val SCHEDULER_REVIVE_INTERVAL =
23032300
ConfigBuilder("spark.scheduler.revive.interval")

core/src/main/scala/org/apache/spark/scheduler/SchedulableBuilder.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -89,7 +89,7 @@ private[spark] class FairSchedulableBuilder(val rootPool: Pool, sc: SparkContext
8989
log"${MDC(LogKeys.FILE_NAME, DEFAULT_SCHEDULER_FILE)}")
9090
Some((is, DEFAULT_SCHEDULER_FILE))
9191
} else {
92-
val schedulingMode = SchedulingMode.withName(sc.conf.get(SCHEDULER_MODE))
92+
val schedulingMode = sc.conf.get(SCHEDULER_MODE)
9393
rootPool.addSchedulable(new Pool(
9494
DEFAULT_POOL_NAME, schedulingMode, DEFAULT_MINIMUM_SHARE, DEFAULT_WEIGHT))
9595
logInfo(log"Fair scheduler configuration not found, created default pool: " +

core/src/main/scala/org/apache/spark/scheduler/TaskSchedulerImpl.scala

Lines changed: 1 addition & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -181,15 +181,7 @@ private[spark] class TaskSchedulerImpl(
181181

182182
private var schedulableBuilder: SchedulableBuilder = null
183183
// default scheduler is FIFO
184-
private val schedulingModeConf = conf.get(SCHEDULER_MODE)
185-
val schedulingMode: SchedulingMode =
186-
try {
187-
SchedulingMode.withName(schedulingModeConf)
188-
} catch {
189-
case e: java.util.NoSuchElementException =>
190-
throw SparkCoreErrors.unrecognizedSchedulerModePropertyError(SCHEDULER_MODE_PROPERTY,
191-
schedulingModeConf)
192-
}
184+
val schedulingMode: SchedulingMode = conf.get(SCHEDULER_MODE)
193185

194186
val rootPool: Pool = new Pool("", schedulingMode, 0, 0)
195187

core/src/main/scala/org/apache/spark/ui/UIWorkloadGenerator.scala

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -50,9 +50,7 @@ private[spark] object UIWorkloadGenerator {
5050
val conf = new SparkConf().setMaster(args(0)).setAppName("Spark UI tester")
5151

5252
val schedulingMode = SchedulingMode.withName(args(1))
53-
if (schedulingMode == SchedulingMode.FAIR) {
54-
conf.set(SCHEDULER_MODE, "FAIR")
55-
}
53+
conf.set(SCHEDULER_MODE, schedulingMode)
5654
val nJobSet = args(2).toInt
5755
val sc = new SparkContext(conf)
5856

core/src/test/scala/org/apache/spark/JobCancellationSuite.scala

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,7 @@ class JobCancellationSuite extends SparkFunSuite with Matchers with BeforeAndAft
5959
}
6060

6161
test("local mode, FIFO scheduler") {
62-
val conf = new SparkConf().set(SCHEDULER_MODE, "FIFO")
62+
val conf = new SparkConf().set(SCHEDULER_MODE.key, "FIFO")
6363
sc = new SparkContext("local[2]", "test", conf)
6464
testCount()
6565
testTake()
@@ -68,7 +68,7 @@ class JobCancellationSuite extends SparkFunSuite with Matchers with BeforeAndAft
6868
}
6969

7070
test("local mode, fair scheduler") {
71-
val conf = new SparkConf().set(SCHEDULER_MODE, "FAIR")
71+
val conf = new SparkConf().set(SCHEDULER_MODE.key, "FAIR")
7272
val xmlPath = getClass.getClassLoader.getResource("fairscheduler.xml").getFile()
7373
conf.set(SCHEDULER_ALLOCATION_FILE, xmlPath)
7474
sc = new SparkContext("local[2]", "test", conf)
@@ -79,7 +79,7 @@ class JobCancellationSuite extends SparkFunSuite with Matchers with BeforeAndAft
7979
}
8080

8181
test("cluster mode, FIFO scheduler") {
82-
val conf = new SparkConf().set(SCHEDULER_MODE, "FIFO")
82+
val conf = new SparkConf().set(SCHEDULER_MODE.key, "FIFO")
8383
sc = new SparkContext("local-cluster[2,1,1024]", "test", conf)
8484
testCount()
8585
testTake()
@@ -88,7 +88,7 @@ class JobCancellationSuite extends SparkFunSuite with Matchers with BeforeAndAft
8888
}
8989

9090
test("cluster mode, fair scheduler") {
91-
val conf = new SparkConf().set(SCHEDULER_MODE, "FAIR")
91+
val conf = new SparkConf().set(SCHEDULER_MODE.key, "FAIR")
9292
val xmlPath = getClass.getClassLoader.getResource("fairscheduler.xml").getFile()
9393
conf.set(SCHEDULER_ALLOCATION_FILE, xmlPath)
9494
sc = new SparkContext("local-cluster[2,1,1024]", "test", conf)

core/src/test/scala/org/apache/spark/internal/config/ConfigEntrySuite.scala

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ import java.util.Locale
2121
import java.util.concurrent.TimeUnit
2222

2323
import org.apache.spark.{SparkConf, SparkFunSuite}
24+
import org.apache.spark.network.shuffledb.DBBackend
2425
import org.apache.spark.network.util.ByteUnit
2526
import org.apache.spark.util.SparkConfWithEnv
2627

@@ -389,7 +390,7 @@ class ConfigEntrySuite extends SparkFunSuite {
389390
}
390391

391392

392-
test("SPARK-51874: Add Enum support to ConfigBuilder") {
393+
test("SPARK-51874: Add Scala Enumeration support to ConfigBuilder") {
393394
object MyTestEnum extends Enumeration {
394395
val X, Y, Z = Value
395396
}
@@ -408,4 +409,20 @@ class ConfigEntrySuite extends SparkFunSuite {
408409
}
409410
assert(e.getMessage === s"${enumConf.key} should be one of X, Y, Z, but was A")
410411
}
412+
413+
test("SPARK-51896: Add Java enum support to ConfigBuilder") {
414+
val conf = new SparkConf()
415+
val enumConf = ConfigBuilder("spark.test.java.enum.key")
416+
.enumConf(classOf[DBBackend])
417+
.createWithDefault(DBBackend.LEVELDB)
418+
assert(conf.get(enumConf) === DBBackend.LEVELDB)
419+
conf.set(enumConf, DBBackend.ROCKSDB)
420+
assert(conf.get(enumConf) === DBBackend.ROCKSDB)
421+
val e = intercept[IllegalArgumentException] {
422+
conf.set(enumConf.key, "ANYDB")
423+
conf.get(enumConf)
424+
}
425+
assert(e.getMessage ===
426+
s"${enumConf.key} should be one of ${DBBackend.values.mkString(", ")}, but was ANYDB")
427+
}
411428
}
Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,79 @@
1+
#
2+
# Licensed to the Apache Software Foundation (ASF) under one or more
3+
# contributor license agreements. See the NOTICE file distributed with
4+
# this work for additional information regarding copyright ownership.
5+
# The ASF licenses this file to You under the Apache License, Version 2.0
6+
# (the "License"); you may not use this file except in compliance with
7+
# the License. You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing, software
12+
# distributed under the License is distributed on an "AS IS" BASIS,
13+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
# See the License for the specific language governing permissions and
15+
# limitations under the License.
16+
#
17+
18+
# Image for building and testing Spark branches. Based on Ubuntu 22.04.
19+
# See also in https://hub.docker.com/_/ubuntu
20+
FROM ubuntu:jammy-20240911.1
21+
LABEL org.opencontainers.image.authors="Apache Spark project <dev@spark.apache.org>"
22+
LABEL org.opencontainers.image.licenses="Apache-2.0"
23+
LABEL org.opencontainers.image.ref.name="Apache Spark Infra Image For PySpark Classic with Python 3.11"
24+
# Overwrite this label to avoid exposing the underlying Ubuntu OS version label
25+
LABEL org.opencontainers.image.version=""
26+
27+
ENV FULL_REFRESH_DATE=20250424
28+
29+
ENV DEBIAN_FRONTEND=noninteractive
30+
ENV DEBCONF_NONINTERACTIVE_SEEN=true
31+
32+
RUN apt-get update && apt-get install -y \
33+
build-essential \
34+
ca-certificates \
35+
curl \
36+
gfortran \
37+
git \
38+
gnupg \
39+
libcurl4-openssl-dev \
40+
libfontconfig1-dev \
41+
libfreetype6-dev \
42+
libfribidi-dev \
43+
libgit2-dev \
44+
libharfbuzz-dev \
45+
libjpeg-dev \
46+
liblapack-dev \
47+
libopenblas-dev \
48+
libpng-dev \
49+
libpython3-dev \
50+
libssl-dev \
51+
libtiff5-dev \
52+
libxml2-dev \
53+
openjdk-17-jdk-headless \
54+
pkg-config \
55+
qpdf \
56+
tzdata \
57+
software-properties-common \
58+
wget \
59+
zlib1g-dev
60+
61+
# Install Python 3.11
62+
RUN add-apt-repository ppa:deadsnakes/ppa
63+
RUN apt-get update && apt-get install -y \
64+
python3.11 \
65+
&& apt-get autoremove --purge -y \
66+
&& apt-get clean \
67+
&& rm -rf /var/lib/apt/lists/*
68+
69+
70+
ARG BASIC_PIP_PKGS="numpy pyarrow>=19.0.0 pandas==2.2.3 plotly<6.0.0 matplotlib openpyxl memory-profiler>=0.61.0 mlflow>=2.8.1 scipy scikit-learn>=1.3.2"
71+
ARG TEST_PIP_PKGS="coverage unittest-xml-reporting"
72+
73+
# Install Python 3.11 packages
74+
RUN curl -sS https://bootstrap.pypa.io/get-pip.py | python3.11
75+
RUN python3.11 -m pip install --ignore-installed blinker>=1.6.2 # mlflow needs this
76+
RUN python3.11 -m pip install $BASIC_PIP_PKGS $TEST_PIP_PKGS && \
77+
python3.11 -m pip install 'torch<2.6.0' torchvision --index-url https://download.pytorch.org/whl/cpu && \
78+
python3.11 -m pip install deepspeed torcheval && \
79+
python3.11 -m pip cache purge

0 commit comments

Comments
 (0)