Skip to content
This repository was archived by the owner on Feb 8, 2024. It is now read-only.

Commit ed711e3

Browse files
author
Mandar Sawant
committed
CORTX-28971: update service fids along with process fids
A motr client restart is considered as a permanent failure and thus, if an existing motr client restarts its existing fid is updated by Hare. A motr process comprises of motr services associated to it. Interested motr modules subscribe to change in the service states, also, motr confc (configuration cache) maintains a hierarchy of processes and services, thus if a process fid changes without updating the service fids, it will not be possible to map a given process to its corresponding services. Thus, It is important to update the services fids along with its respective process fid. Solution: - Allocate service fids along with its corresponding process fid. - Make fid allocation routine generic to work for any motr configuration object. Signed-off-by: Mandar Sawant <[email protected]>
1 parent 96abb23 commit ed711e3

File tree

5 files changed

+64
-45
lines changed

5 files changed

+64
-45
lines changed

hax/hax/motr/__init__.py

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -199,7 +199,7 @@ def send_entrypoint_request_reply(self, message: EntrypointRequest):
199199
util = self.consul_util
200200
# Disabling dynamic fids allocation until dtm is ready to consume.
201201
# if util.is_proc_client(process_fid) and message.is_first_request:
202-
# util.alloc_next_process_fid(process_fid)
202+
# util.update_process_fid(process_fid)
203203

204204
# When stopping, there's a possibility that hax may receive
205205
# an entrypoint request from motr land. In order to unblock
@@ -327,13 +327,9 @@ def _proc_fids_to_eps(proc_skip_list: List[Fid]) -> List[str]:
327327
# fid.
328328
# if (st.fid.container == ObjT.PROCESS.value and
329329
# self.consul_util.is_proc_client(st.fid)):
330-
# proc_full_fid = self.consul_util.get_process_full_fid(st.fid)
330+
# proc_full_fid = self.consul_util.get_obj_full_fid(st.fid)
331331
# st.fid = proc_full_fid
332332
note = HaNoteStruct(st.fid.to_c(), st.status.to_ha_note_status())
333-
# if (st.fid.container == ObjT.PROCESS.value and
334-
# self.consul_util.is_proc_client(st.fid)):
335-
# proc_full_fid = self.consul_util.get_process_full_fid(st.fid)
336-
# st.fid = proc_full_fid
337333
notes.append(note)
338334

339335
# For process failure, we report failure for the corresponding
@@ -531,7 +527,8 @@ def _generate_sub_services(self,
531527
LOG.debug('Process fid=%s encloses %s services as follows: %s', fid,
532528
len(service_list), service_list)
533529
service_notes = [
534-
HaNoteStruct(no_id=x.fid.to_c(), no_state=new_state)
530+
HaNoteStruct(no_id=cns.get_obj_full_fid(x.fid).to_c(),
531+
no_state=new_state)
535532
for x in service_list
536533
]
537534
if notify_devices:

hax/hax/types.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -206,7 +206,8 @@ def for_json(self):
206206

207207
ObjTMaskMap = {
208208
# here for object key - higher 32 bits is masked for dynamic part.
209-
ObjT.PROCESS: Fid(QW_F, DW_F)
209+
ObjT.PROCESS: Fid(QW_F, DW_F),
210+
ObjT.SERVICE: Fid(QW_F, DW_F)
210211
}
211212

212213

hax/hax/util.py

Lines changed: 50 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -37,9 +37,9 @@
3737
from hax.common import HaxGlobalState
3838
from hax.exception import HAConsistencyException, InterruptedException
3939
from hax.types import (ByteCountStats, ConfHaProcess, Fid, FsStatsWithTime,
40-
ObjT, ObjHealth, ObjTMaskMap, Profile, PverInfo,
41-
PverState, m0HaProcessEvent, m0HaProcessType,
42-
KeyDelete, HaNoteStruct, m0HaObjState)
40+
FidTypeToObjT, ObjT, ObjHealth, ObjTMaskMap,
41+
Profile, PverInfo, PverState, m0HaProcessEvent,
42+
m0HaProcessType, KeyDelete, HaNoteStruct, m0HaObjState)
4343

4444
from hax.consul.cache import (uses_consul_cache, invalidates_consul_cache,
4545
supports_consul_cache)
@@ -1728,7 +1728,7 @@ def get_process_status(self,
17281728
fid: Fid,
17291729
proc_node=None,
17301730
kv_cache=None) -> MotrConsulProcInfo:
1731-
proc_base_fid = self.get_process_base_fid(fid)
1731+
proc_base_fid = self.get_base_fid(fid)
17321732
key = f'processes/{proc_base_fid}'
17331733
status = self.kv.kv_get(key, kv_cache=kv_cache, allow_null=True)
17341734
if status:
@@ -1742,7 +1742,7 @@ def get_process_status_local(self,
17421742
fid: Fid,
17431743
proc_node=None,
17441744
kv_cache=None) -> MotrConsulProcInfo:
1745-
proc_base_fid = self.get_process_base_fid(fid)
1745+
proc_base_fid = self.get_base_fid(fid)
17461746
this_node = self.get_local_nodename()
17471747
key = f'{this_node}/processes/{proc_base_fid}'
17481748
status = self.kv.kv_get(key, kv_cache=kv_cache, allow_null=True)
@@ -1753,12 +1753,12 @@ def get_process_status_local(self,
17531753
return MotrConsulProcInfo('Unknown', 'Unknown')
17541754

17551755
@repeat_if_fails()
1756-
def get_process_full_fid(self, proc_base_fid: Fid) -> Optional[Fid]:
1757-
proc_fid = self.kv.kv_get(str(proc_base_fid), recurse=False)
1758-
if proc_fid is not None:
1759-
pfid: Fid = Fid.parse(json.loads(proc_fid['Value']))
1760-
return pfid
1761-
return proc_base_fid
1756+
def get_obj_full_fid(self, base_fid: Fid) -> Optional[Fid]:
1757+
full_fid = self.kv.kv_get(str(base_fid), recurse=False)
1758+
if full_fid is not None:
1759+
fid: Fid = Fid.parse(json.loads(full_fid['Value']))
1760+
return fid
1761+
return base_fid
17621762

17631763
def is_proc_local(self, pfid: Fid) -> bool:
17641764
local_node = self.get_local_nodename()
@@ -1924,7 +1924,7 @@ def get_service_health(self,
19241924
@uses_consul_cache
19251925
def get_process_node(self, proc_fid: Fid, kv_cache=None) -> str:
19261926
try:
1927-
proc_base_fid = self.get_process_base_fid(proc_fid)
1927+
proc_base_fid = self.get_base_fid(proc_fid)
19281928
fidk = proc_base_fid.key
19291929
# 'node/<node_name>/process/<process_fidk>/service/type'
19301930
# node_items = self.kv.kv_get('m0conf/nodes',
@@ -2171,7 +2171,7 @@ def set_process_state(self,
21712171
ObjHealth.STOPPED: 'stopped',
21722172
ObjHealth.RECOVERING: 'dtm_recovering'
21732173
}
2174-
proc_base_fid = self.get_process_base_fid(process_fid)
2174+
proc_base_fid = self.get_base_fid(process_fid)
21752175

21762176
# Example key is as follows
21772177
# m0conf/nodes/0x6e00000000000001:0x3/processes/0x7200000000000001:
@@ -2281,62 +2281,75 @@ def set_motr_processes_status(self, fid, status, bAdd=False):
22812281
motr_processes_status)
22822282

22832283
@repeat_if_fails()
2284-
def process_dynamic_fidk_lock(self) -> bool:
2284+
def obj_dynamic_fidk_lock(self, objt: ObjT) -> bool:
22852285
# Acquire lock to update last_updated_base_fidk.
22862286
# This will block until lock is acquired.
22872287
# Will break if any other exception than
22882288
# HAConsistencyException occurs.
22892289
try:
2290-
while not self.kv.kv_put('fidk_update_lock', 'true', cas=0):
2290+
while not self.kv.kv_put(f'{objt.name}/fidk_update_lock',
2291+
'true', cas=0):
22912292
sleep(1)
22922293
return True
22932294
except Exception:
22942295
return False
22952296

22962297
@repeat_if_fails()
2297-
def process_dynamic_fidk_unlock(self):
2298+
def obj_dynamic_fidk_unlock(self, objt: ObjT):
22982299
# Release fidk_update_lock.
22992300
# This will block until released.
23002301
try:
23012302
keys: List[KeyDelete] = [
2302-
KeyDelete(name='fidk_update_lock', recurse=True),
2303+
KeyDelete(name=f'{objt.name}/fidk_update_lock', recurse=True),
23032304
]
23042305
while not self.kv.kv_delete_in_transaction(keys):
23052306
sleep(1)
23062307
except Exception:
23072308
raise RuntimeError('Unreachable')
23082309

23092310
@repeat_if_fails()
2310-
def get_process_next_dynamic_fidk_lock(self) -> int:
2311-
if self.process_dynamic_fidk_lock():
2312-
fidk = self.kv.kv_get('last_dynamic_fid_key/process',
2311+
def get_obj_next_dynamic_fidk_lock(self, objt: ObjT) -> int:
2312+
if self.obj_dynamic_fidk_lock(objt):
2313+
fidk = self.kv.kv_get(f'last_dynamic_fid_key/{objt.name.lower()}',
23132314
recurse=False)
23142315
new_fidk = int(json.loads(fidk['Value'])) + 1
23152316
# Update dynamic fid key.
2316-
while not self.kv.kv_put('last_dynamic_fid_key/process',
2317-
json.dumps(str(new_fidk))):
2317+
while not self.kv.kv_put(
2318+
f'last_dynamic_fid_key/{objt.name.lower()}',
2319+
json.dumps(str(new_fidk))):
23182320
sleep(1)
2319-
self.process_dynamic_fidk_unlock()
2321+
self.obj_dynamic_fidk_unlock(objt)
23202322
return new_fidk
23212323

23222324
@repeat_if_fails()
2323-
def alloc_next_process_fid(self, process_fid: Fid) -> Fid:
2324-
next_fidk = self.get_process_next_dynamic_fidk_lock()
2325-
fid_mask: Fid = ObjTMaskMap[ObjT.PROCESS]
2326-
fid_cont = process_fid.container
2327-
fid_key = process_fid.key + ((fid_mask.key * next_fidk) + next_fidk)
2328-
new_proc_fid = Fid(fid_cont, fid_key)
2329-
base_fid = self.get_process_base_fid(new_proc_fid)
2325+
def alloc_next_obj_fid(self, obj_fid: Fid) -> Fid:
2326+
objt: ObjT = FidTypeToObjT[obj_fid.container]
2327+
next_fidk = self.get_obj_next_dynamic_fidk_lock(objt)
2328+
fid_mask: Fid = ObjTMaskMap[objt]
2329+
fid_cont = obj_fid.container
2330+
fid_key = obj_fid.key + ((fid_mask.key * next_fidk) + next_fidk)
2331+
new_obj_fid = Fid(fid_cont, fid_key)
2332+
base_fid = self.get_base_fid(new_obj_fid)
23302333
# Save base fid to actualy fid mapping in Consul.
23312334
while not self.kv.kv_put(f'{base_fid}',
2332-
json.dumps(str(new_proc_fid))):
2335+
json.dumps(str(new_obj_fid))):
23332336
sleep(1)
2334-
return new_proc_fid
2335-
2336-
def get_process_base_fid(self, proc_fid: Fid) -> Fid:
2337-
fid_mask: Fid = ObjTMaskMap[ObjT.PROCESS]
2338-
base_fid = Fid(proc_fid.container,
2339-
(proc_fid.key & fid_mask.key))
2337+
return new_obj_fid
2338+
2339+
def update_process_fid(self, proc_fid: Fid) -> List[Fid]:
2340+
new_fids: List[Fid] = []
2341+
# First allocate new process fid.
2342+
new_fids.append(self.alloc_next_obj_fid(proc_fid))
2343+
service_list = self.get_services_by_parent_process(proc_fid)
2344+
for svc in service_list:
2345+
new_fids.append(self.alloc_next_obj_fid(svc.fid))
2346+
return new_fids
2347+
2348+
def get_base_fid(self, obj_fid: Fid) -> Fid:
2349+
objt: ObjT = FidTypeToObjT[obj_fid.container]
2350+
fid_mask: Fid = ObjTMaskMap[objt]
2351+
base_fid = Fid(obj_fid.container,
2352+
(obj_fid.key & fid_mask.key))
23402353
return base_fid
23412354

23422355
@repeat_if_fails()

hax/test/integration/test_motr.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -808,6 +808,13 @@ def my_get(key: str, recurse: bool = False, **kwds):
808808
"name": "localhost",
809809
"state": "M0_NC_UNKNOWN"
810810
}))
811+
elif key == '0x7300000000000001:0x10':
812+
return new_kv('0x7300000000000001:0x10',
813+
json.dumps('0x7300000000000001:0x10'))
814+
elif key == '0x7300000000000001:0x11':
815+
return new_kv('0x7300000000000001:0x11',
816+
json.dumps('0x7300000000000001:0x11'))
817+
811818
raise RuntimeError(f'Unexpected call: key={key}, recurse={recurse}')
812819

813820
return my_get

hax/test/test_failure.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,7 @@ def test_process_failure(self):
104104
consul_util.get_node_encl_fid = Mock(return_value=encl_fid)
105105
consul_util.get_node_ctrl_fids = Mock(return_value=[ctrl_fid])
106106
consul_util.get_ioservice_ctrl_fid = Mock(return_value=ctrl_fid)
107+
consul_util.get_obj_full_fid = Mock(return_value=service_fid)
107108

108109
# These failure indications are here to trigger specific code paths for
109110
# node failure. Additional tests can cover different scenarios (e.g.

0 commit comments

Comments
 (0)