Skip to content

Commit 4517eb2

Browse files
Merge pull request #1316 from Open-EO/test_spark_history
Test spark history
2 parents eccf8ee + 3efbef6 commit 4517eb2

File tree

2 files changed

+35
-2
lines changed

2 files changed

+35
-2
lines changed

openeogeotrellis/deploy/local.py

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
"""
33
Script to start a local server. This script can serve as the entry-point for doing spark-submit.
44
"""
5-
5+
import datetime
66
import logging
77
import os
88
import socket
@@ -34,7 +34,8 @@ def setup_local_spark(log_dir: Path = Path.cwd(), verbosity=0):
3434
# TODO: make this more reusable (e.g. also see `_setup_local_spark` in tests/conftest.py)
3535
from pyspark import SparkContext, find_spark_home
3636

37-
spark_python = os.path.join(find_spark_home._find_spark_home(), "python")
37+
spark_home = find_spark_home._find_spark_home()
38+
spark_python = os.path.join(spark_home, "python")
3839
logging.info(f"spark_python: {spark_python}")
3940
py4j = glob(os.path.join(spark_python, "lib", "py4j-*.zip"))[0]
4041
sys.path[:0] = [spark_python, py4j]
@@ -86,6 +87,21 @@ def setup_local_spark(log_dir: Path = Path.cwd(), verbosity=0):
8687
conf.set(key="spark.executor.memory", value="2G")
8788
OPENEO_LOCAL_DEBUGGING = smart_bool(os.environ.get("OPENEO_LOCAL_DEBUGGING", "false"))
8889
conf.set("spark.ui.enabled", OPENEO_LOCAL_DEBUGGING)
90+
if OPENEO_LOCAL_DEBUGGING:
91+
events_dir = "/tmp/spark-events" # manually create this folder if you want to keep history
92+
if os.path.exists(events_dir):
93+
conf.set("spark.eventLog.enabled", "true")
94+
_log.info(
95+
f"Start spark history server with {spark_home}/sbin/start-history-server.sh and open http://localhost:18080/"
96+
)
97+
files = glob(os.path.join(events_dir, "*"))
98+
for f in files:
99+
# remove event logs older than 7 days:
100+
if os.path.getmtime(f) < datetime.datetime.now().timestamp() - 7 * 24 * 3600:
101+
try:
102+
os.remove(f)
103+
except Exception as e:
104+
_log.warning(f"Failed to remove old spark event log {f}: {e}")
89105

90106
jars = []
91107
more_jars = [] if "GEOPYSPARK_JARS_PATH" not in os.environ else os.environ["GEOPYSPARK_JARS_PATH"].split(":")

tests/conftest.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
import sys
77
import typing
88
from datetime import datetime
9+
from glob import glob
910
from pathlib import Path
1011
from typing import Optional
1112
from unittest import mock
@@ -170,6 +171,22 @@ def _setup_local_spark(out: TerminalReporter, verbosity=0):
170171
conf.set(key="spark.executor.memory", value="2G")
171172
OPENEO_LOCAL_DEBUGGING = smart_bool(os.environ.get("OPENEO_LOCAL_DEBUGGING", "false"))
172173
conf.set("spark.ui.enabled", OPENEO_LOCAL_DEBUGGING)
174+
# Test if this causes issues on CI. Should be disabled in next commit.
175+
if OPENEO_LOCAL_DEBUGGING:
176+
events_dir = "/tmp/spark-events" # manually create this folder if you want to keep history
177+
if os.path.exists(events_dir):
178+
conf.set("spark.eventLog.enabled", "true")
179+
out.write_line(
180+
f"Start spark history server with $SPARK_HOME/sbin/start-history-server.sh and open http://localhost:18080/"
181+
)
182+
files = glob(os.path.join(events_dir, "*"))
183+
for f in files:
184+
# remove event logs older than 7 days:
185+
if os.path.getmtime(f) < datetime.now().timestamp() - 7 * 24 * 3600:
186+
try:
187+
os.remove(f)
188+
except Exception as e:
189+
out.write_line(f"Failed to remove old spark event log {f}: {e}")
173190

174191
jars = []
175192
for jar_dir in additional_jar_dirs:

0 commit comments

Comments
 (0)