|
| 1 | +/* Copyright (c) 2026 The F9 Microkernel Project. All rights reserved. |
| 2 | + * Use of this source code is governed by a BSD-style license that can be |
| 3 | + * found in the LICENSE file. |
| 4 | + */ |
| 5 | + |
| 6 | +#ifndef PLATFORM_IPC_FASTPATH_H_ |
| 7 | +#define PLATFORM_IPC_FASTPATH_H_ |
| 8 | + |
| 9 | +#include <debug.h> |
| 10 | +#include <ipc.h> |
| 11 | +#include <platform/armv7m.h> |
| 12 | +#include <platform/irq.h> |
| 13 | +#include <sched.h> |
| 14 | +#include <syscall.h> |
| 15 | +#include <thread.h> |
| 16 | +#include <types.h> |
| 17 | + |
| 18 | +/** |
| 19 | + * Fastpath IPC optimization for ARM Cortex-M. |
| 20 | + * |
| 21 | + * Bypasses softirq scheduling for simple short-message IPC by performing |
| 22 | + * direct register transfer in the SVC handler context. |
| 23 | + * |
| 24 | + * CRITICAL: Message registers (MR0-MR7) are in R4-R11, NOT R0-R7. |
| 25 | + * - R0-R3: Syscall parameters (to_tid, from_tid, timeout, unused) |
| 26 | + * - R4-R11: Message registers (MR0-MR7) via ctx.regs[0-7] |
| 27 | + * |
| 28 | + * This implementation uses a NAKED wrapper to capture R4-R11 immediately |
| 29 | + * before any compiler-generated prologue can clobber them. |
| 30 | + * |
| 31 | + * Implementation is in header as static inline for zero call overhead. |
| 32 | + */ |
| 33 | + |
| 34 | +/** |
| 35 | + * ipc_fastpath_copy_mrs() - Copy message registers to receiver |
| 36 | + * @saved_mrs: Saved message registers R4-R11 (MR0-MR7) |
| 37 | + * @sender: Source thread (for msg_buffer access) |
| 38 | + * @receiver: Destination thread |
| 39 | + * @n_untyped: Number of untyped words to copy (0-39) |
| 40 | + * |
| 41 | + * Copies MR0-MR{n_untyped} from sender to receiver: |
| 42 | + * - MR0-MR7: From saved_mrs to receiver->ctx.regs[0-7] |
| 43 | + * - MR8-MR39: From sender->msg_buffer to receiver->msg_buffer (NEW) |
| 44 | + * |
| 45 | + * WCET: ~20 cycles (MR0-MR7) + ~100 cycles (MR8-MR39, if used) |
| 46 | + */ |
| 47 | +static inline void ipc_fastpath_copy_mrs(volatile uint32_t *saved_mrs, |
| 48 | + struct tcb *sender, |
| 49 | + struct tcb *receiver, |
| 50 | + int n_untyped) |
| 51 | +{ |
| 52 | + int count = n_untyped + 1; /* +1 for tag in MR0 */ |
| 53 | + int i; |
| 54 | + |
| 55 | + /* Phase 1: Copy MR0-MR7 from saved registers (R4-R11) */ |
| 56 | + for (i = 0; i < count && i < 8; i++) |
| 57 | + receiver->ctx.regs[i] = saved_mrs[i]; |
| 58 | + |
| 59 | + /* Phase 2: Copy MR8-MR39 from sender's msg_buffer (if needed) */ |
| 60 | + if (count > 8) { |
| 61 | + int buf_count = count - 8; /* Number of words in buffer */ |
| 62 | + if (buf_count > 32) |
| 63 | + buf_count = 32; /* Clamp to buffer size */ |
| 64 | + |
| 65 | + for (i = 0; i < buf_count; i++) |
| 66 | + receiver->msg_buffer[i] = sender->msg_buffer[i]; |
| 67 | + } |
| 68 | +} |
| 69 | + |
| 70 | +/** |
| 71 | + * ipc_fastpath_helper() - Fastpath IPC implementation (C helper) |
| 72 | + * @caller: Current thread attempting IPC |
| 73 | + * @svc_param: SVC stack frame (R0-R3, R12, LR, PC, xPSR) |
| 74 | + * @saved_mrs: Pre-saved R4-R11 message registers |
| 75 | + * |
| 76 | + * Called by the naked wrapper after message registers have been captured. |
| 77 | + * Returns 1 if fastpath succeeded, 0 to fall back to slowpath. |
| 78 | + */ |
| 79 | +static __attribute__((used)) inline int ipc_fastpath_helper(struct tcb *caller, |
| 80 | + uint32_t *svc_param, |
| 81 | + volatile uint32_t *saved_mrs) |
| 82 | +{ |
| 83 | + struct tcb *to_thr; |
| 84 | + l4_thread_t to_tid, from_tid; |
| 85 | + ipc_msg_tag_t tag; |
| 86 | + |
| 87 | + /* Extract IPC parameters from hardware stack (R0-R3) */ |
| 88 | + to_tid = svc_param[REG_R0]; |
| 89 | + from_tid = svc_param[REG_R1]; |
| 90 | + |
| 91 | + /* Extract tag from saved MR0 (R4), NOT from R0! */ |
| 92 | + tag.raw = saved_mrs[0]; |
| 93 | + |
| 94 | + /* Fastpath Eligibility Check */ |
| 95 | + |
| 96 | + /* Criterion 1: Simple send (to_tid valid, from_tid = NILTHREAD) */ |
| 97 | + if (to_tid == L4_NILTHREAD || from_tid != L4_NILTHREAD) |
| 98 | + return 0; /* Slowpath: receive-only or send+receive */ |
| 99 | + |
| 100 | + /* Criterion 2: No typed items (no MapItems/GrantItems) */ |
| 101 | + if (tag.s.n_typed != 0) |
| 102 | + return 0; /* Slowpath: requires map_area() processing */ |
| 103 | + |
| 104 | + /* Criterion 3: Short message (fits in MR0-MR39: registers + buffer) |
| 105 | + * MR0-MR7: 8 words × 4 bytes = 32 bytes (registers) |
| 106 | + * MR8-MR39: 32 words × 4 bytes = 128 bytes (buffer) |
| 107 | + * Total capacity: 160 bytes (40 words) |
| 108 | + */ |
| 109 | + if (tag.s.n_untyped > 39) |
| 110 | + return 0; /* Slowpath: requires UTCB access */ |
| 111 | + |
| 112 | + /* Criterion 4: Receiver exists and is blocked waiting */ |
| 113 | + to_thr = thread_by_globalid(to_tid); |
| 114 | + if (!to_thr || to_thr->state != T_RECV_BLOCKED) |
| 115 | + return 0; /* Slowpath: receiver not ready */ |
| 116 | + |
| 117 | + /* Criterion 5: Receiver is waiting for us */ |
| 118 | + if (to_thr->ipc_from != L4_ANYTHREAD && |
| 119 | + to_thr->ipc_from != caller->t_globalid) |
| 120 | + return 0; /* Slowpath: receiver waiting for someone else */ |
| 121 | + |
| 122 | + /* Criterion 6: Special thread handling */ |
| 123 | + if (to_tid == TID_TO_GLOBALID(THREAD_LOG) || |
| 124 | + to_tid == TID_TO_GLOBALID(THREAD_IRQ_REQUEST)) |
| 125 | + return 0; /* Slowpath: special kernel threads */ |
| 126 | + |
| 127 | + /* Criterion 7: Thread start protocol */ |
| 128 | + if (tag.raw == 0x00000005) |
| 129 | + return 0; /* Slowpath: thread initialization */ |
| 130 | + |
| 131 | + /* All criteria met - Execute Fastpath */ |
| 132 | + |
| 133 | + /* Phase 0: Dequeue caller (will re-enqueue later) */ |
| 134 | + extern void sched_dequeue(struct tcb *); |
| 135 | + sched_dequeue(caller); |
| 136 | + |
| 137 | + /* Phase 1: Copy message registers from sender to receiver |
| 138 | + * - MR0-MR7: From saved registers (R4-R11) |
| 139 | + * - MR8-MR39: From sender's msg_buffer to receiver's msg_buffer (if |
| 140 | + * needed) |
| 141 | + */ |
| 142 | + ipc_fastpath_copy_mrs(saved_mrs, caller, to_thr, tag.s.n_untyped); |
| 143 | + |
| 144 | + /* Phase 2: Update receiver context */ |
| 145 | + /* Set R0 to sender ID (IPC protocol) */ |
| 146 | + ((uint32_t *) to_thr->ctx.sp)[REG_R0] = caller->t_globalid; |
| 147 | + to_thr->utcb->sender = caller->t_globalid; |
| 148 | + |
| 149 | + /* Phase 3: Update thread states */ |
| 150 | + |
| 151 | + /* Clear timeout events (no timeout in fastpath) */ |
| 152 | + caller->timeout_event = 0; |
| 153 | + to_thr->timeout_event = 0; |
| 154 | + |
| 155 | + /* Receiver becomes runnable with IPC priority boost */ |
| 156 | + to_thr->state = T_RUNNABLE; |
| 157 | + to_thr->ipc_from = L4_NILTHREAD; |
| 158 | + sched_set_priority(to_thr, SCHED_PRIO_IPC); |
| 159 | + sched_enqueue(to_thr); |
| 160 | + |
| 161 | + /* Caller continues (send-only, no reply expected) |
| 162 | + * Fastpath only handles from_tid==NILTHREAD (simple send). |
| 163 | + * For L4_Call (send+receive), slowpath handles blocking. |
| 164 | + * |
| 165 | + * Re-enqueue caller (was dequeued at SVC entry). |
| 166 | + * It's safe to enqueue current thread - sched has double-enqueue |
| 167 | + * protection. |
| 168 | + */ |
| 169 | + caller->state = T_RUNNABLE; |
| 170 | + sched_enqueue(caller); |
| 171 | + |
| 172 | + /* Phase 4: Request context switch via PendSV */ |
| 173 | + /* DON'T do immediate switch - let PendSV handle it normally */ |
| 174 | + request_schedule(); |
| 175 | + |
| 176 | + return 1; /* Fastpath succeeded */ |
| 177 | +} |
| 178 | + |
| 179 | +/** |
| 180 | + * ipc_try_fastpath() - IPC fastpath using pre-saved R4-R11 |
| 181 | + * @caller: Current thread attempting IPC |
| 182 | + * @svc_param: SVC stack frame (R0-R3, R12, LR, PC, xPSR) |
| 183 | + * |
| 184 | + * Reads message registers from __irq_saved_regs which were saved by |
| 185 | + * SVC_HANDLER before any C code ran, ensuring MR0-MR7 are untouched. |
| 186 | + * |
| 187 | + * Returns: |
| 188 | + * 1 if fastpath succeeded (caller should skip slowpath) |
| 189 | + * 0 if fastpath unavailable (caller must use slowpath) |
| 190 | + * |
| 191 | + * Eligibility criteria: |
| 192 | + * - Simple send (to_tid valid, from_tid == NILTHREAD) |
| 193 | + * - Short message (n_untyped <= 39, n_typed == 0) |
| 194 | + * - Receiver ready (T_RECV_BLOCKED, waiting for caller or ANYTHREAD) |
| 195 | + */ |
| 196 | +static inline int ipc_try_fastpath(struct tcb *caller, uint32_t *svc_param) |
| 197 | +{ |
| 198 | + extern volatile uint32_t __irq_saved_regs[8]; |
| 199 | + |
| 200 | + /* Read from global __irq_saved_regs saved by SVC_HANDLER */ |
| 201 | + return ipc_fastpath_helper(caller, svc_param, __irq_saved_regs); |
| 202 | +} |
| 203 | + |
| 204 | +#endif /* PLATFORM_IPC_FASTPATH_H_ */ |
0 commit comments