Skip to content

Commit a74b938

Browse files
MCOL-5806: added ability to start node in read-only mode
1 parent 3fd90d3 commit a74b938

File tree

11 files changed

+208
-44
lines changed

11 files changed

+208
-44
lines changed

cmapi/cmapi_server/constants.py

Lines changed: 20 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
"""
55
import os
66
from typing import NamedTuple
7+
from enum import Enum
78

89

910
# default MARIADB ColumnStore config path
@@ -51,6 +52,16 @@
5152
CMAPI_INSTALL_PATH, 'cmapi_server/SingleNode.xml'
5253
)
5354

55+
class MCSProgs(Enum):
56+
STORAGE_MANAGER = 'StorageManager'
57+
WORKER_NODE = 'workernode'
58+
CONTROLLER_NODE = 'controllernode'
59+
PRIM_PROC = 'PrimProc'
60+
EXE_MGR = 'ExeMgr'
61+
WRITE_ENGINE_SERVER = 'WriteEngineServer'
62+
DML_PROC = 'DMLProc'
63+
DDL_PROC = 'DDLProc'
64+
5465
# constants for dispatchers
5566
class ProgInfo(NamedTuple):
5667
"""NamedTuple for some additional info about handling mcs processes."""
@@ -64,17 +75,17 @@ class ProgInfo(NamedTuple):
6475
# on top level of process handling
6576
# mcs-storagemanager starts conditionally inside mcs-loadbrm, but should be
6677
# stopped using cmapi
67-
ALL_MCS_PROGS = {
78+
ALL_MCS_PROGS: dict[str, ProgInfo] = {
6879
# workernode starts on primary and non primary node with 1 or 2 added
6980
# to subcommand (DBRM_Worker1 - on primary, DBRM_Worker2 - non primary)
70-
'StorageManager': ProgInfo(15, 'mcs-storagemanager', '', False, 1),
71-
'workernode': ProgInfo(13, 'mcs-workernode', 'DBRM_Worker{}', False, 1),
72-
'controllernode': ProgInfo(11, 'mcs-controllernode', 'fg', True),
73-
'PrimProc': ProgInfo(5, 'mcs-primproc', '', False, 1),
74-
'ExeMgr': ProgInfo(9, 'mcs-exemgr', '', False, 1),
75-
'WriteEngineServer': ProgInfo(7, 'mcs-writeengineserver', '', False, 3),
76-
'DMLProc': ProgInfo(3, 'mcs-dmlproc', '', False),
77-
'DDLProc': ProgInfo(1, 'mcs-ddlproc', '', False),
81+
MCSProgs.STORAGE_MANAGER.value: ProgInfo(15, 'mcs-storagemanager', '', False, 1),
82+
MCSProgs.WORKER_NODE.value: ProgInfo(13, 'mcs-workernode', 'DBRM_Worker{}', False, 1),
83+
MCSProgs.CONTROLLER_NODE.value: ProgInfo(11, 'mcs-controllernode', 'fg', True),
84+
MCSProgs.PRIM_PROC.value: ProgInfo(5, 'mcs-primproc', '', False, 1),
85+
MCSProgs.EXE_MGR.value: ProgInfo(9, 'mcs-exemgr', '', False, 1),
86+
MCSProgs.WRITE_ENGINE_SERVER.value: ProgInfo(7, 'mcs-writeengineserver', '', False, 3),
87+
MCSProgs.DML_PROC.value: ProgInfo(3, 'mcs-dmlproc', '', False),
88+
MCSProgs.DDL_PROC.value: ProgInfo(1, 'mcs-ddlproc', '', False),
7889
}
7990

8091
# constants for docker container dispatcher

cmapi/cmapi_server/controllers/endpoints.py

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -401,7 +401,8 @@ def put_config(self):
401401
MCSProcessManager.stop_node(
402402
is_primary=node_config.is_primary_node(),
403403
use_sudo=use_sudo,
404-
timeout=request_timeout
404+
timeout=request_timeout,
405+
is_read_only=node_config.is_read_only(),
405406
)
406407
except CMAPIBasicError as err:
407408
raise_422_error(
@@ -417,7 +418,7 @@ def put_config(self):
417418
)
418419
if in_maintenance_state():
419420
module_logger.info(
420-
'Maintaninance state is active in new config. '
421+
'Maintenance state is active in new config. '
421422
'MCS processes should not be started.'
422423
)
423424
cherrypy.engine.publish('failover', False)
@@ -430,6 +431,7 @@ def put_config(self):
430431
MCSProcessManager.start_node(
431432
is_primary=node_config.is_primary_node(),
432433
use_sudo=use_sudo,
434+
is_read_only=node_config.is_read_only(),
433435
)
434436
except CMAPIBasicError as err:
435437
raise_422_error(
@@ -633,7 +635,8 @@ def put_start(self):
633635
try:
634636
MCSProcessManager.start_node(
635637
is_primary=node_config.is_primary_node(),
636-
use_sudo=use_sudo
638+
use_sudo=use_sudo,
639+
is_read_only=node_config.is_read_only(),
637640
)
638641
except CMAPIBasicError as err:
639642
raise_422_error(
@@ -668,7 +671,8 @@ def put_shutdown(self):
668671
MCSProcessManager.stop_node(
669672
is_primary=node_config.is_primary_node(),
670673
use_sudo=use_sudo,
671-
timeout=timeout
674+
timeout=timeout,
675+
is_read_only=node_config.is_read_only(),
672676
)
673677
except CMAPIBasicError as err:
674678
raise_422_error(

cmapi/cmapi_server/failover_agent.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -95,7 +95,7 @@ def enterStandbyMode(self, test_mode = False):
9595
try:
9696
# TODO: remove test_mode condition and add mock for testing
9797
if not test_mode:
98-
MCSProcessManager.stop_node(is_primary=nc.is_primary_node())
98+
MCSProcessManager.stop_node(is_primary=nc.is_primary_node(), is_read_only=nc.is_read_only())
9999
logger.info(
100100
'FA.enterStandbyMode(): successfully stopped node.'
101101
)

cmapi/cmapi_server/handlers/cluster.py

Lines changed: 15 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -196,7 +196,8 @@ def process_shutdown():
196196
@staticmethod
197197
def add_node(
198198
node: str, config: str = DEFAULT_MCS_CONF_PATH,
199-
logger: logging.Logger = logging.getLogger('cmapi_server')
199+
logger: logging.Logger = logging.getLogger('cmapi_server'),
200+
read_only: bool = False,
200201
) -> dict:
201202
"""Method to add node to MCS CLuster.
202203
@@ -207,6 +208,8 @@ def add_node(
207208
:type config: str, optional
208209
:param logger: logger, defaults to logging.getLogger('cmapi_server')
209210
:type logger: logging.Logger, optional
211+
:param read_only: add node in read-only mode, defaults to False
212+
:type read_only: bool, optional
210213
:raises CMAPIBasicError: on exception while starting transaction
211214
:raises CMAPIBasicError: if transaction start isn't successful
212215
:raises CMAPIBasicError: on exception while adding node
@@ -216,7 +219,7 @@ def add_node(
216219
:return: result of adding node
217220
:rtype: dict
218221
"""
219-
logger.debug(f'Cluster add node command called. Adding node {node}.')
222+
logger.debug('Cluster add node command called. Adding node %s in %s mode.', node, 'read-only' if read_only else 'read-write')
220223

221224
response = {'timestamp': str(datetime.now())}
222225
transaction_id = get_id()
@@ -238,13 +241,18 @@ def add_node(
238241
try:
239242
add_node(
240243
node, input_config_filename=config,
241-
output_config_filename=config
244+
output_config_filename=config,
245+
read_only=read_only,
242246
)
243247
if not get_dbroots(node, config):
244-
add_dbroot(
245-
host=node, input_config_filename=config,
246-
output_config_filename=config
247-
)
248+
if not read_only: # Read-only nodes don't own dbroots
249+
add_dbroot(
250+
host=node, input_config_filename=config,
251+
output_config_filename=config
252+
)
253+
else:
254+
logger.debug("Node %s is read-only, skipping dbroot addition", node)
255+
248256
except Exception as err:
249257
rollback_transaction(transaction_id, cs_config_filename=config)
250258
raise CMAPIBasicError('Error while adding node.') from err

cmapi/cmapi_server/helpers.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -513,6 +513,10 @@ def get_desired_nodes(config=DEFAULT_MCS_CONF_PATH):
513513
return [ node.text for node in nodes ]
514514

515515

516+
def get_read_only_nodes(root) -> list[str]:
517+
return [node.text for node in root.findall("./ReadOnlyNodes/Node")]
518+
519+
516520
def in_maintenance_state(config=DEFAULT_MCS_CONF_PATH):
517521
nc = NodeConfig()
518522
root = nc.get_current_config_root(config, upgrade=False)
@@ -549,6 +553,7 @@ def get_dbroots(node, config=DEFAULT_MCS_CONF_PATH):
549553
dbroots = []
550554
smc_node = root.find('./SystemModuleConfig')
551555
mod_count = int(smc_node.find('./ModuleCount3').text)
556+
552557
for i in range(1, mod_count+1):
553558
ip_addr = smc_node.find(f'./ModuleIPAddr{i}-1-3').text
554559
hostname = smc_node.find(f'./ModuleHostName{i}-1-3').text
@@ -568,6 +573,12 @@ def get_dbroots(node, config=DEFAULT_MCS_CONF_PATH):
568573
dbroots.append(
569574
smc_node.find(f"./ModuleDBRootID{i}-{j}-3").text
570575
)
576+
577+
if dbroots and nc.is_read_only():
578+
logger = logging.getLogger("dbroots")
579+
logger.warning("Config contains dbroots %s for this read-only node, ignoring", dbroots)
580+
return []
581+
571582
return dbroots
572583

573584

cmapi/cmapi_server/managers/process.py

Lines changed: 25 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,8 @@
77
import psutil
88

99
from cmapi_server.exceptions import CMAPIBasicError
10-
from cmapi_server.constants import MCS_INSTALL_BIN, ALL_MCS_PROGS
10+
from cmapi_server.constants import MCS_INSTALL_BIN, ALL_MCS_PROGS, MCSProgs
11+
from cmapi_server.process_dispatchers.base import BaseDispatcher
1112
from cmapi_server.process_dispatchers.systemd import SystemdDispatcher
1213
from cmapi_server.process_dispatchers.container import (
1314
ContainerDispatcher
@@ -18,7 +19,7 @@
1819
from mcs_node_control.models.process import Process
1920

2021

21-
PROCESS_DISPATCHERS = {
22+
PROCESS_DISPATCHERS: dict[str, type[BaseDispatcher]] = {
2223
'systemd': SystemdDispatcher,
2324
# could be used in docker containers and OSes w/o systemd
2425
'container': ContainerDispatcher,
@@ -404,37 +405,46 @@ def is_node_processes_ok(
404405
return set(node_progs) == set(p['name'] for p in running_procs)
405406

406407
@classmethod
407-
def start_node(cls, is_primary: bool, use_sudo: bool = True):
408+
def start_node(cls, is_primary: bool, use_sudo: bool = True, is_read_only: bool = False) -> None:
408409
"""Start mcs node processes.
409410
410411
:param is_primary: is node primary or not, defaults to True
411412
:type is_primary: bool
412413
:param use_sudo: use sudo or not, defaults to True
413414
:type use_sudo: bool, optional
415+
:param is_read_only: if true, doesn't start WriteEngine
416+
:type is_read_only: bool, optional
414417
:raises CMAPIBasicError: immediately if one mcs process not started
415418
"""
416419
for prog_name in cls._get_sorted_progs(is_primary):
417420
if (
418421
cls.dispatcher_name == 'systemd'
419-
and prog_name == 'StorageManager'
422+
and prog_name == MCSProgs.STORAGE_MANAGER.value
420423
):
421424
# TODO: MCOL-5458
422425
logging.info(
423426
f'Skip starting {prog_name} with systemd dispatcher.'
424427
)
425428
continue
426429
# TODO: additional error handling
427-
if prog_name == 'controllernode':
430+
if prog_name == MCSProgs.CONTROLLER_NODE.value:
428431
cls._wait_for_workernodes()
429-
if prog_name in ('DMLProc', 'DDLProc'):
432+
if prog_name in (MCSProgs.DML_PROC.value, MCSProgs.DDL_PROC.value):
430433
cls._wait_for_controllernode()
434+
if is_read_only and prog_name == MCSProgs.WRITE_ENGINE_SERVER.value:
435+
logging.debug('Node is in read-only mode, not starting WriteEngine')
436+
continue
431437
if not cls.start(prog_name, is_primary, use_sudo):
432438
logging.error(f'Process "{prog_name}" not started properly.')
433439
raise CMAPIBasicError(f'Error while starting "{prog_name}".')
434440

435441
@classmethod
436442
def stop_node(
437-
cls, is_primary: bool, use_sudo: bool = True, timeout: int = 10
443+
cls,
444+
is_primary: bool,
445+
use_sudo: bool = True,
446+
timeout: int = 10,
447+
is_read_only: bool = False,
438448
):
439449
"""Stop mcs node processes.
440450
@@ -444,20 +454,25 @@ def stop_node(
444454
:type use_sudo: bool, optional
445455
:param timeout: timeout for DMLProc gracefully stop using DBRM, seconds
446456
:type timeout: int
457+
:param is_read_only: if true, doesn't stop WriteEngine
458+
:type is_read_only: bool, optional
447459
:raises CMAPIBasicError: immediately if one mcs process not stopped
448460
"""
449461
# Every time try to stop all processes no matter primary it or slave,
450462
# so use full available list of processes. Otherwise, it could cause
451463
# undefined behaviour when primary gone and then recovers (failover
452464
# triggered 2 times).
453465
for prog_name in cls._get_sorted_progs(True, reverse=True):
466+
if is_read_only and prog_name == MCSProgs.WRITE_ENGINE_SERVER.value:
467+
logging.debug('Node is in read-only mode, not stopping WriteEngine')
468+
continue
454469
if not cls.stop(prog_name, is_primary, use_sudo):
455470
logging.error(f'Process "{prog_name}" not stopped properly.')
456471
raise CMAPIBasicError(f'Error while stopping "{prog_name}"')
457472

458473
@classmethod
459-
def restart_node(cls, is_primary: bool, use_sudo: bool):
474+
def restart_node(cls, is_primary: bool, use_sudo: bool, is_read_only: bool = False):
460475
"""TODO: For next releases."""
461476
if cls.get_running_mcs_procs():
462-
cls.stop_node(is_primary, use_sudo)
463-
cls.start_node(is_primary, use_sudo)
477+
cls.stop_node(is_primary, use_sudo, is_read_only)
478+
cls.start_node(is_primary, use_sudo, is_read_only)

cmapi/cmapi_server/node_manipulation.py

Lines changed: 39 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,8 @@ def switch_node_maintenance(
6161
def add_node(
6262
node: str, input_config_filename: str = DEFAULT_MCS_CONF_PATH,
6363
output_config_filename: Optional[str] = None,
64-
rebalance_dbroots: bool = True
64+
rebalance_dbroots: bool = True,
65+
read_only: bool = False,
6566
):
6667
"""Add node to a cluster.
6768
@@ -95,14 +96,23 @@ def add_node(
9596
try:
9697
if not _replace_localhost(c_root, node):
9798
pm_num = _add_node_to_PMS(c_root, node)
98-
_add_WES(c_root, pm_num, node)
99+
100+
if not read_only:
101+
_add_WES(c_root, pm_num, node)
102+
else:
103+
logging.info("Node is read-only, skipping WES addition")
104+
_add_read_only_node(c_root, node)
105+
99106
_add_DBRM_Worker(c_root, node)
100107
_add_Module_entries(c_root, node)
101108
_add_active_node(c_root, node)
102109
_add_node_to_ExeMgrs(c_root, node)
103110
if rebalance_dbroots:
104-
_rebalance_dbroots(c_root)
105-
_move_primary_node(c_root)
111+
if not read_only:
112+
_rebalance_dbroots(c_root)
113+
_move_primary_node(c_root)
114+
else:
115+
logging.debug("Node is read-only, skipping dbroots rebalancing")
106116
except Exception:
107117
logging.error(
108118
'Caught exception while adding node, config file is unchanged',
@@ -140,7 +150,11 @@ def remove_node(
140150

141151
if len(active_nodes) > 1:
142152
pm_num = _remove_node_from_PMS(c_root, node)
143-
_remove_WES(c_root, pm_num)
153+
154+
is_read_only = node in helpers.get_read_only_nodes(c_root)
155+
if not is_read_only:
156+
_remove_WES(c_root, pm_num)
157+
144158
_remove_DBRM_Worker(c_root, node)
145159
_remove_Module_entries(c_root, node)
146160
_remove_from_ExeMgrs(c_root, node)
@@ -151,7 +165,7 @@ def remove_node(
151165
# TODO: unspecific name, need to think of a better one
152166
_remove_node(c_root, node)
153167

154-
if rebalance_dbroots:
168+
if rebalance_dbroots and not is_read_only:
155169
_rebalance_dbroots(c_root)
156170
_move_primary_node(c_root)
157171
else:
@@ -359,12 +373,16 @@ def __remove_helper(parent_node, node):
359373

360374
def _remove_node(root, node):
361375
'''
362-
remove node from DesiredNodes, InactiveNodes, and ActiveNodes
376+
remove node from DesiredNodes, InactiveNodes, ActiveNodes and (if present) ReadOnlyNodes
363377
'''
364378

365379
for n in (root.find("./DesiredNodes"), root.find("./InactiveNodes"), root.find("./ActiveNodes")):
366380
__remove_helper(n, node)
367381

382+
read_only_nodes = root.find("./ReadOnlyNodes")
383+
if read_only_nodes is not None:
384+
__remove_helper(read_only_nodes, node)
385+
368386

369387
# This moves a node from ActiveNodes to InactiveNodes
370388
def _deactivate_node(root, node):
@@ -972,6 +990,19 @@ def _add_WES(root, pm_num, node):
972990
etree.SubElement(wes_node, "Port").text = "8630"
973991

974992

993+
def _add_read_only_node(root, node) -> None:
994+
"""Add node name to ReadOnlyNodes if it's not already there"""
995+
read_only_nodes = root.find("./ReadOnlyNodes")
996+
if read_only_nodes is None:
997+
read_only_nodes = etree.SubElement(root, "ReadOnlyNodes")
998+
else:
999+
for n in read_only_nodes.findall("./Node"):
1000+
if n.text == node:
1001+
return
1002+
1003+
etree.SubElement(read_only_nodes, "Node").text = node
1004+
1005+
9751006
def _add_DBRM_Worker(root, node):
9761007
'''
9771008
find the highest numbered DBRM_Worker entry, or one that isn't used atm
@@ -1074,7 +1105,7 @@ def _add_node_to_PMS(root, node):
10741105

10751106
return new_pm_num
10761107

1077-
def _replace_localhost(root, node):
1108+
def _replace_localhost(root: etree.Element, node: str) -> bool:
10781109
# if DBRM_Controller/IPAddr is 127.0.0.1 or localhost,
10791110
# then replace all instances, else do nothing.
10801111
controller_host = root.find('./DBRM_Controller/IPAddr')

0 commit comments

Comments
 (0)