ipc: ipc_service: icbmsg backend: workaround endpoint binding deadlock

This change works around the issue with the semaphore timeout during
the Bluetooth HCI driver initialization when the bt_enable function
is called in the context of the System Workqueue thread. This issue
only affects platform that use the IPC service and its ICBMsg backend
(e.g. the nRF54H20 DK target).

The bt_enable function, when called in the System Workqueue context,
results in a deadlock, as the waiting semaphore of the Bluetooth HCI
driver times out:

bt_hci_driver: Endpoint binding failed with -11

During the Bluetooth HCI driver open operation in the context of the
bt_enable function, the driver code waits using the semaphore for the
endpoint binding process of the IPC service module to finalize. The
issue occurs when the  waiting occurs in the System Workqueue context.
The ICBMsg backend from the IPC service schedules a system work during
the endpoint registration, in which it finalizes the binding operation
- also in the System Workqueue context. As the Bluetooth HCI driver
with its wait operation keeps the System Workqueue context busy, the
endpoint binding cannot be completed by the ICBMsg backend before the
HCI driver semaphore timeout.

Signed-off-by: Kamil Piszczek <Kamil.Piszczek@nordicsemi.no>
This commit is contained in:
Kamil Piszczek 2024-04-29 15:01:06 +02:00 committed by Fabio Baltieri
parent 6f06b3c794
commit 1e6e6a228f

View file

@ -110,6 +110,12 @@ LOG_MODULE_REGISTER(ipc_icbmsg,
/** Registered endpoints count mask in flags. */ /** Registered endpoints count mask in flags. */
#define FLAG_EPT_COUNT_MASK 0xFFFF #define FLAG_EPT_COUNT_MASK 0xFFFF
/** Workqueue stack size for bounding processing (this configuration is not optimized). */
#define EP_BOUND_WORK_Q_STACK_SIZE (512U)
/** Workqueue priority for bounding processing. */
#define EP_BOUND_WORK_Q_PRIORITY (CONFIG_SYSTEM_WORKQUEUE_PRIORITY)
enum msg_type { enum msg_type {
MSG_DATA = 0, /* Data message. */ MSG_DATA = 0, /* Data message. */
MSG_RELEASE_DATA, /* Release data buffer message. */ MSG_RELEASE_DATA, /* Release data buffer message. */
@ -194,6 +200,9 @@ struct control_message {
BUILD_ASSERT(NUM_EPT <= EPT_ADDR_INVALID, "Too many endpoints"); BUILD_ASSERT(NUM_EPT <= EPT_ADDR_INVALID, "Too many endpoints");
/* Work queue for bounding processing. */
static struct k_work_q ep_bound_work_q;
/** /**
* Calculate pointer to block from its index and channel configuration (RX or TX). * Calculate pointer to block from its index and channel configuration (RX or TX).
* No validation is performed. * No validation is performed.
@ -672,7 +681,7 @@ static int send_bound_message(struct backend_data *dev_data, struct ept_data *ep
*/ */
static void schedule_ept_bound_process(struct backend_data *dev_data) static void schedule_ept_bound_process(struct backend_data *dev_data)
{ {
k_work_submit(&dev_data->ep_bound_work); k_work_submit_to_queue(&ep_bound_work_q, &dev_data->ep_bound_work);
} }
/** /**
@ -1117,6 +1126,17 @@ static int backend_init(const struct device *instance)
{ {
const struct icbmsg_config *conf = instance->config; const struct icbmsg_config *conf = instance->config;
struct backend_data *dev_data = instance->data; struct backend_data *dev_data = instance->data;
static K_THREAD_STACK_DEFINE(ep_bound_work_q_stack, EP_BOUND_WORK_Q_STACK_SIZE);
static bool is_work_q_started;
if (!is_work_q_started) {
k_work_queue_init(&ep_bound_work_q);
k_work_queue_start(&ep_bound_work_q, ep_bound_work_q_stack,
K_THREAD_STACK_SIZEOF(ep_bound_work_q_stack),
EP_BOUND_WORK_Q_PRIORITY, NULL);
is_work_q_started = true;
}
dev_data->conf = conf; dev_data->conf = conf;
dev_data->is_initiator = (conf->rx.blocks_ptr < conf->tx.blocks_ptr); dev_data->is_initiator = (conf->rx.blocks_ptr < conf->tx.blocks_ptr);