Skip to content

Commit e846a70

Browse files
Merge pull request #15 from mantidproject/memory_monitoring
Add memory monitoring to livereduce
2 parents 344666e + 518e7c3 commit e846a70

File tree

6 files changed

+74
-4
lines changed

6 files changed

+74
-4
lines changed

environment.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,3 +9,4 @@ dependencies:
99
- pyinotify
1010
- pre-commit
1111
- python-build
12+
- psutil

scripts/livereduce.py

Lines changed: 31 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,15 +3,19 @@
33
import os
44
import signal
55
import sys
6+
import threading
67
import time
78
from hashlib import md5
89

910
import mantid # for clearer error message
11+
import psutil
1012
import pyinotify
11-
from mantid.simpleapi import StartLiveData
13+
from mantid.simpleapi import StartLiveData, mtd
1214
from mantid.utils.logging import log_to_python as mtd_log_to_python
1315
from packaging.version import parse as parse_version
1416

17+
CONVERSION_FACTOR_BYTES_TO_MB = 1.0 / (1024 * 1024)
18+
1519
# ##################
1620
# configure logging
1721
# ##################
@@ -79,6 +83,13 @@ def stop(cls):
7983
else:
8084
cls.logger.info("mantid not initialized - nothing to cleanup")
8185

86+
def restart_and_clear(self):
87+
self.logger.info("Restarting Live Data and clearing workspaces")
88+
self.stop()
89+
time.sleep(1.0)
90+
mtd.clear()
91+
self.start()
92+
8293

8394
# ##################
8495
# register a signal handler so we can exit gracefully if someone kills us
@@ -157,6 +168,10 @@ def __init__(self, filename):
157168
self.accumMethod = str(json_doc.get("accum_method", "Add"))
158169
self.periods = json_doc.get("periods", None)
159170
self.spectra = json_doc.get("spectra", None)
171+
self.system_mem_limit_perc = json_doc.get("system_mem_limit_perc", 25) # set to 0 to disable
172+
self.mem_check_interval_sec = json_doc.get("mem_check_interval_sec", 1)
173+
self.mem_limit = psutil.virtual_memory().total * self.system_mem_limit_perc / 100
174+
self.proc_pid = psutil.Process(os.getpid())
160175

161176
# location of the scripts
162177
self.script_dir = json_doc.get("script_dir")
@@ -321,9 +336,16 @@ def process_default(self, event):
321336
self.scriptfiles[event.pathname] = newmd5
322337
# restart the service
323338
self.logger.info(f'Processing script "{event.pathname}" changed - restarting ' '"StartLiveData"')
324-
self.livemanager.stop()
325-
time.sleep(1.0) # seconds
326-
self.livemanager.start()
339+
self.livemanager.restart_and_clear()
340+
341+
342+
def memory_checker(config, livemanager):
343+
while True:
344+
mem_used = config.proc_pid.memory_info().rss
345+
if mem_used > config.mem_limit:
346+
logger.error(f"Memory usage {mem_used * CONVERSION_FACTOR_BYTES_TO_MB:.2f} MB exceeds limit")
347+
livemanager.restart_and_clear()
348+
time.sleep(config.mem_check_interval_sec)
327349

328350

329351
# determine the configuration file
@@ -355,6 +377,11 @@ def process_default(self, event):
355377
# start up the live data
356378
liveDataMgr.start()
357379

380+
# start the memory checker
381+
if config.system_mem_limit_perc > 0:
382+
memory_thread = threading.Thread(target=memory_checker, args=(config, liveDataMgr), daemon=True)
383+
memory_thread.start()
384+
358385
# inotify will keep the program running
359386
notifier.loop()
360387

test/README.md

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,3 +24,11 @@ Once the first chunk of live data is processed, `ctrl-C` will
2424
interrupt the process and it will close cleanly.
2525

2626
In testing mode, the logging will go to `${PWD}/livereduce.log` and can be watched with `tail -F livereduce.log`
27+
28+
29+
Example using event data, to test memory monitoring
30+
----------------------------------------------------
31+
32+
This test case will continuously accumulate events until it fails.
33+
34+
Start the server using `test/fake_event_server.py` and use the configuration `test/fake_event.conf`.

test/fake_event.conf

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
{
2+
"instrument": "ISIS_Event",
3+
"script_dir": "test",
4+
"update_every": 3,
5+
"CONDA_ENV": "livereduce",
6+
"accum_method":"Add",
7+
"system_mem_limit_perc": 25,
8+
"mem_check_interval_sec": 1
9+
}

test/fake_event_server.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
from threading import Thread
2+
3+
from mantid import AlgorithmManager, ConfigService
4+
from mantid.simpleapi import FakeISISEventDAE
5+
6+
facility = ConfigService.getFacility().name()
7+
ConfigService.setFacility("TEST_LIVE")
8+
9+
10+
def startServer():
11+
FakeISISEventDAE(NEvents=1000000)
12+
13+
14+
try:
15+
thread = Thread(target=startServer)
16+
thread.start()
17+
thread.join()
18+
except Exception as e: # noqa: BLE001
19+
print(e)
20+
alg = AlgorithmManager.newestInstanceOf("FakeISISEventDAE")
21+
if alg.isRunning():
22+
alg.cancel()
23+
finally:
24+
ConfigService.setFacility(facility)
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
print("Running proc script")

0 commit comments

Comments
 (0)