From f7ff02189644f04933d8732caa3176101944e406 Mon Sep 17 00:00:00 2001 From: LeandroTreu Date: Mon, 19 May 2025 14:26:36 +0200 Subject: [PATCH 1/6] lease max free memory --- rfaas/include/rfaas/allocation.hpp | 2 +- server/resource_manager/executor.cpp | 7 +++++++ 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/rfaas/include/rfaas/allocation.hpp b/rfaas/include/rfaas/allocation.hpp index af12eba..4488917 100644 --- a/rfaas/include/rfaas/allocation.hpp +++ b/rfaas/include/rfaas/allocation.hpp @@ -8,7 +8,7 @@ namespace rfaas { struct LeaseRequest { // > 0: Number of cores to be allocated - // < 0: client_id with negative sign, deallocation & disconnect request + // <= 0: client_id with negative sign, deallocation & disconnect request int16_t cores; int32_t memory; }; diff --git a/server/resource_manager/executor.cpp b/server/resource_manager/executor.cpp index 8a86005..711e2c6 100644 --- a/server/resource_manager/executor.cpp +++ b/server/resource_manager/executor.cpp @@ -48,11 +48,18 @@ namespace rfaas::resource_manager { bool Executor::lease(int cores, int memory) { + + // Lease entire free memory when receiving reserved value -1 + if (memory == -1) { + memory = _free_memory; + } + // Not enough memory? skip if(_free_memory < memory) { return false; } + // Note that values of cores <= 0 are reserved by the resource manager to disconnect clients if(_free_cores < cores) { return false; } From db2fd7d63893a334f1fce355b42459783a0caf51 Mon Sep 17 00:00:00 2001 From: LeandroTreu Date: Mon, 19 May 2025 19:46:19 +0200 Subject: [PATCH 2/6] propagate leased max memory to LeaseAllocation and LeaseResponse --- rfaas/include/rfaas/allocation.hpp | 2 ++ rfaas/include/rfaas/client.hpp | 12 +++++++----- server/resource_manager/db.cpp | 7 +++++++ server/resource_manager/executor.cpp | 7 ------- server/resource_manager/manager.cpp | 4 ++-- 5 files changed, 18 insertions(+), 14 deletions(-) diff --git a/rfaas/include/rfaas/allocation.hpp b/rfaas/include/rfaas/allocation.hpp index 4488917..13c7a21 100644 --- a/rfaas/include/rfaas/allocation.hpp +++ b/rfaas/include/rfaas/allocation.hpp @@ -25,6 +25,8 @@ namespace rfaas { int32_t port; char address[16]; //LeasedNode nodes[MAX_NODES_PER_LEASE]; + int16_t cores; + int32_t memory; }; struct AllocationRequest { diff --git a/rfaas/include/rfaas/client.hpp b/rfaas/include/rfaas/client.hpp index 3a92ded..b396826 100644 --- a/rfaas/include/rfaas/client.hpp +++ b/rfaas/include/rfaas/client.hpp @@ -54,12 +54,14 @@ namespace rfaas { } int response_id = responses[0].wr_id; + LeaseResponse& lease_response = _resource_mgr.response(response_id); + return rfaas::executor{ - std::string{_resource_mgr.response(response_id).address}, - _resource_mgr.response(response_id).port, - cores, - memory, - _resource_mgr.response(response_id).lease_id, + std::string{lease_response.address}, + lease_response.port, + lease_response.cores, + lease_response.memory, + lease_response.lease_id, dev }; } diff --git a/server/resource_manager/db.cpp b/server/resource_manager/db.cpp index a0e00c2..5a3210f 100644 --- a/server/resource_manager/db.cpp +++ b/server/resource_manager/db.cpp @@ -75,6 +75,11 @@ namespace rfaas { namespace resource_manager { continue; } + // Lease entire free memory when receiving reserved value -1 + if (memory == -1) { + memory = shared_ptr->_free_memory; + } + if(!shared_ptr->lease(numcores, memory)) { ++it; SPDLOG_DEBUG("Node {} cannot be used, not enough resources!", shared_ptr->node); @@ -84,6 +89,8 @@ namespace rfaas { namespace resource_manager { lease.lease_id = _lease_count++; lease.port = shared_ptr->port; strncpy(lease.address, shared_ptr->address.c_str(), Executor::ADDRESS_LENGTH); + lease.cores = numcores; + lease.memory = memory; bool is_total = shared_ptr->is_fully_leased(); if(is_total) { diff --git a/server/resource_manager/executor.cpp b/server/resource_manager/executor.cpp index 711e2c6..8a86005 100644 --- a/server/resource_manager/executor.cpp +++ b/server/resource_manager/executor.cpp @@ -48,18 +48,11 @@ namespace rfaas::resource_manager { bool Executor::lease(int cores, int memory) { - - // Lease entire free memory when receiving reserved value -1 - if (memory == -1) { - memory = _free_memory; - } - // Not enough memory? skip if(_free_memory < memory) { return false; } - // Note that values of cores <= 0 are reserved by the resource manager to disconnect clients if(_free_cores < cores) { return false; } diff --git a/server/resource_manager/manager.cpp b/server/resource_manager/manager.cpp index aade3cf..83fe0b4 100644 --- a/server/resource_manager/manager.cpp +++ b/server/resource_manager/manager.cpp @@ -322,8 +322,8 @@ void Manager::_handle_client_message(ibv_wc& wc, std::vector& poll_send } else { allocated->_send_buffer[0].lease_id = client.response()[0].lease_id; - allocated->_send_buffer[0].cores = cores; - allocated->_send_buffer[0].memory = memory; + allocated->_send_buffer[0].cores = client.response()[0].cores; + allocated->_send_buffer[0].memory = client.response()[0].memory; allocated->_connection->post_send( allocated->_send_buffer, From 32939259c419f0101625e58a46f3359b96b36f8b Mon Sep 17 00:00:00 2001 From: LeandroTreu Date: Mon, 19 May 2025 20:04:45 +0200 Subject: [PATCH 3/6] compilation fix --- rfaas/include/rfaas/client.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rfaas/include/rfaas/client.hpp b/rfaas/include/rfaas/client.hpp index b396826..4ddc357 100644 --- a/rfaas/include/rfaas/client.hpp +++ b/rfaas/include/rfaas/client.hpp @@ -54,7 +54,7 @@ namespace rfaas { } int response_id = responses[0].wr_id; - LeaseResponse& lease_response = _resource_mgr.response(response_id); + const LeaseResponse& lease_response = _resource_mgr.response(response_id); return rfaas::executor{ std::string{lease_response.address}, From 1072d7fd414da2fbd5adf61d6841843e530aae88 Mon Sep 17 00:00:00 2001 From: LeandroTreu Date: Wed, 21 May 2025 17:11:32 +0200 Subject: [PATCH 4/6] executor lease integer checks --- server/resource_manager/executor.cpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/server/resource_manager/executor.cpp b/server/resource_manager/executor.cpp index 8a86005..8745f97 100644 --- a/server/resource_manager/executor.cpp +++ b/server/resource_manager/executor.cpp @@ -48,7 +48,10 @@ namespace rfaas::resource_manager { bool Executor::lease(int cores, int memory) { - // Not enough memory? skip + if (cores <= 0 || memory <= 0 || _free_cores <= 0 || _free_memory <= 0) { + return false; + } + if(_free_memory < memory) { return false; } From 4705c5c0b5226c16bf8175d43f0ffcf4857a3a6f Mon Sep 17 00:00:00 2001 From: LeandroTreu Date: Thu, 5 Jun 2025 17:02:17 +0200 Subject: [PATCH 5/6] executor manager lease deallocation send fix --- server/executor_manager/client.cpp | 26 ++++++++++++-------- server/executor_manager/executor_process.cpp | 6 +++-- server/executor_manager/executor_process.hpp | 3 ++- server/executor_manager/manager.cpp | 14 +++++------ 4 files changed, 28 insertions(+), 21 deletions(-) diff --git a/server/executor_manager/client.cpp b/server/executor_manager/client.cpp index e2c83d7..d31d661 100644 --- a/server/executor_manager/client.cpp +++ b/server/executor_manager/client.cpp @@ -95,6 +95,22 @@ namespace rfaas::executor_manager { waitpid(executor->id(), &status, WUNTRACED); auto e = std::chrono::high_resolution_clock::now(); spdlog::info("Waited for child {} ms", std::chrono::duration_cast(e-b).count()); + + int32_t lease_id = executor->_lease_id; + if (res_mgr_connection) { + + spdlog::debug("Client {}: sending lease deallocation to resource manager for lease id {}", _id, lease_id); + res_mgr_connection->close_lease( + lease_id, + allocation_time, + accounting.data()[0].execution_time, + accounting.data()[0].hot_polling_time + ); + } + else { + spdlog::error("Client {}: could not send lease deallocation to resource manager for lease id {}", _id, lease_id); + } + executor.reset(); } spdlog::info( @@ -104,16 +120,6 @@ namespace rfaas::executor_manager { accounting.data()[0].execution_time / 1000.0 ); - if(res_mgr_connection) { - - res_mgr_connection->close_lease( - _id, - allocation_time, - accounting.data()[0].execution_time, - accounting.data()[0].hot_polling_time - ); - - } //acc.hot_polling_time = acc.execution_time = 0; // SEGFAULT? diff --git a/server/executor_manager/executor_process.cpp b/server/executor_manager/executor_process.cpp index 4b38ec8..aaabeef 100644 --- a/server/executor_manager/executor_process.cpp +++ b/server/executor_manager/executor_process.cpp @@ -30,13 +30,15 @@ namespace rfaas::executor_manager { connections[pos] = connection; } - ProcessExecutor::ProcessExecutor(int cores, ProcessExecutor::time_t alloc_begin, pid_t pid): + ProcessExecutor::ProcessExecutor(int cores, ProcessExecutor::time_t alloc_begin, pid_t pid, int32_t lease_id): ActiveExecutor(cores), _pid(pid) { _allocation_begin = alloc_begin; // FIXME: remove after connection _allocation_finished = _allocation_begin; + + _lease_id = lease_id; } std::tuple ProcessExecutor::check() const @@ -219,7 +221,7 @@ namespace rfaas::executor_manager { } if(counter == 36) counter = 0; - return new ProcessExecutor{lease.cores, begin, mypid}; + return new ProcessExecutor{lease.cores, begin, mypid, lease.id}; } } diff --git a/server/executor_manager/executor_process.hpp b/server/executor_manager/executor_process.hpp index f362374..14eef70 100644 --- a/server/executor_manager/executor_process.hpp +++ b/server/executor_manager/executor_process.hpp @@ -32,6 +32,7 @@ namespace rfaas::executor_manager { rdmalib::Connection** connections; int connections_len; int cores; + int32_t _lease_id; ActiveExecutor(int cores): connections(new rdmalib::Connection*[cores]), @@ -49,7 +50,7 @@ namespace rfaas::executor_manager { { pid_t _pid; - ProcessExecutor(int cores, time_t alloc_begin, pid_t pid); + ProcessExecutor(int cores, time_t alloc_begin, pid_t pid, int32_t lease_id); // FIXME: kill active executor //~ProcessExecutor(); diff --git a/server/executor_manager/manager.cpp b/server/executor_manager/manager.cpp index 48e30b9..638d090 100644 --- a/server/executor_manager/manager.cpp +++ b/server/executor_manager/manager.cpp @@ -311,8 +311,8 @@ namespace rfaas::executor_manager { ); auto end = std::chrono::high_resolution_clock::now(); spdlog::info( - "Client {} at {}:{} has executor with {} ID and {} cores, time {} us", - client.id(), client_address, client_port, client.executor->id(), lease->cores, + "Client {} at {}:{} has executor with pid {}, lease id {}, cores {}, time {} us", + client.id(), client_address, client_port, client.executor->id(), lease->id, lease->cores, std::chrono::duration_cast(end-now).count() ); @@ -327,7 +327,6 @@ namespace rfaas::executor_manager { } else { spdlog::info("Client {} disconnects", client.id()); - //client.disable(i, _accounting_data.data()[i]); client.disable(_res_mgr_connection.get()); return false; @@ -340,7 +339,7 @@ namespace rfaas::executor_manager { for(auto it = _clients.begin(); it != _clients.end(); ++it) { Client & client = it->second; - int i = it->first; + uint32_t client_id = it->first; if(!client.active()) { continue; } @@ -361,9 +360,9 @@ namespace rfaas::executor_manager { // send lease cancellation spdlog::info( "Executor at client {} exited, status {}, time allocated {} us, polling {} us, execution {} us", - i, std::get<1>(status), client.allocation_time, - client.accounting.data()[i].hot_polling_time / 1000.0, - client.accounting.data()[i].execution_time / 1000.0 + client_id, std::get<1>(status), client.allocation_time, + client.accounting.data()[client_id].hot_polling_time / 1000.0, + client.accounting.data()[client_id].execution_time / 1000.0 ); client.executor.reset(nullptr); spdlog::info("Finished cleanup"); @@ -426,7 +425,6 @@ namespace rfaas::executor_manager { spdlog::debug("[Manager] Disconnecting client"); Client& client = (*it).second; - //client.disable(i, _accounting_data.data()[i]); client.disable(_res_mgr_connection.get()); _clients.erase(it); From fd723481a353fb0d9d256643b22bc783008d231f Mon Sep 17 00:00:00 2001 From: LeandroTreu Date: Thu, 5 Jun 2025 17:47:04 +0200 Subject: [PATCH 6/6] remove no-op in resource manager --- server/resource_manager/manager.cpp | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/server/resource_manager/manager.cpp b/server/resource_manager/manager.cpp index 83fe0b4..fe458e1 100644 --- a/server/resource_manager/manager.cpp +++ b/server/resource_manager/manager.cpp @@ -359,7 +359,6 @@ void Manager::process_clients() rdmalib::Poller recv_poller{std::get<1>(*_state.shared_queue(2))}; int client_count = 0; std::vector poll_send; - std::vector removals; while (!_shutdown.load()) { @@ -392,17 +391,6 @@ void Manager::process_clients() } poll_send.clear(); } - - if (removals.size()) { - for (auto it : removals) { - spdlog::info("Remove client id {}", it->second.client_id); - _clients.erase(it); - } - - client_count -= removals.size(); - removals.clear(); - } - } spdlog::info("Background thread stops processing client events");