Skip to content

Commit 1cb34a8

Browse files
Merge pull request #7411 from douzzer/20240410-lock-free-wc_linuxkm_fpu_state_assoc
20240410-lock-free-wc_linuxkm_fpu_state_assoc
2 parents 3742c4d + 954005a commit 1cb34a8

1 file changed

Lines changed: 158 additions & 17 deletions

File tree

linuxkm/linuxkm_memory.c

Lines changed: 158 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,10 @@ struct wc_thread_fpu_count_ent *wc_linuxkm_fpu_states = NULL;
8383

8484
#ifdef WOLFSSL_COMMERCIAL_LICENSE
8585

86+
#ifndef LINUXKM_FPU_STATES_FOLLOW_THREADS
87+
#error WOLFSSL_COMMERCIAL_LICENSE requires LINUXKM_FPU_STATES_FOLLOW_THREADS
88+
#endif
89+
8690
#pragma GCC diagnostic push
8791
#pragma GCC diagnostic ignored "-Wunused-parameter"
8892
#pragma GCC diagnostic ignored "-Wnested-externs"
@@ -114,10 +118,14 @@ WARN_UNUSED_RESULT int allocate_wolfcrypt_linuxkm_fpu_states(void)
114118
return BAD_STATE_E;
115119
}
116120

121+
#ifdef LINUXKM_FPU_STATES_FOLLOW_THREADS
117122
if (nr_cpu_ids >= 16)
118123
wc_linuxkm_fpu_states_n_tracked = nr_cpu_ids * 2;
119124
else
120125
wc_linuxkm_fpu_states_n_tracked = 32;
126+
#else
127+
wc_linuxkm_fpu_states_n_tracked = nr_cpu_ids;
128+
#endif
121129

122130
wc_linuxkm_fpu_states =
123131
(struct wc_thread_fpu_count_ent *)malloc(
@@ -198,7 +206,8 @@ void free_wolfcrypt_linuxkm_fpu_states(void) {
198206
wc_linuxkm_fpu_states = NULL;
199207
}
200208

201-
/* lock-(mostly)-free thread-local storage facility for tracking recursive fpu
209+
#ifdef LINUXKM_FPU_STATES_FOLLOW_THREADS
210+
/* legacy thread-local storage facility for tracking recursive fpu
202211
* pushing/popping
203212
*/
204213
static struct wc_thread_fpu_count_ent *wc_linuxkm_fpu_state_assoc(int create_p) {
@@ -249,6 +258,84 @@ static struct wc_thread_fpu_count_ent *wc_linuxkm_fpu_state_assoc(int create_p)
249258
}
250259
}
251260

261+
#else /* !LINUXKM_FPU_STATES_FOLLOW_THREADS */
262+
263+
/* lock-free O(1)-lookup CPU-local storage facility for tracking recursive fpu
264+
* pushing/popping
265+
*/
266+
static struct wc_thread_fpu_count_ent *wc_linuxkm_fpu_state_assoc_unlikely(int create_p) {
267+
int my_cpu = raw_smp_processor_id();
268+
pid_t my_pid = task_pid_nr(current), slot_pid;
269+
struct wc_thread_fpu_count_ent *slot;
270+
271+
{
272+
static int _warned_on_null = 0;
273+
if (wc_linuxkm_fpu_states == NULL)
274+
{
275+
if (_warned_on_null == 0) {
276+
pr_err("wc_linuxkm_fpu_state_assoc called by pid %d"
277+
" before allocate_wolfcrypt_linuxkm_fpu_states.\n", my_pid);
278+
_warned_on_null = 1;
279+
}
280+
return NULL;
281+
}
282+
}
283+
284+
slot = &wc_linuxkm_fpu_states[my_cpu];
285+
slot_pid = __atomic_load_n(&slot->pid, __ATOMIC_CONSUME);
286+
if (slot_pid == my_pid)
287+
return slot;
288+
if (create_p) {
289+
/* caller must have already called kernel_fpu_begin() if create_p. */
290+
if (slot_pid == 0) {
291+
__atomic_store_n(&slot->pid, my_pid, __ATOMIC_RELEASE);
292+
return slot;
293+
} else {
294+
static int _warned_on_mismatched_pid = 0;
295+
if (_warned_on_mismatched_pid < 10) {
296+
pr_err("wc_linuxkm_fpu_state_assoc called by pid %d on cpu %d"
297+
" but cpu slot already reserved by pid %d.\n", my_pid, my_cpu, slot_pid);
298+
++_warned_on_mismatched_pid;
299+
}
300+
return NULL;
301+
}
302+
} else {
303+
return NULL;
304+
}
305+
}
306+
307+
static inline struct wc_thread_fpu_count_ent *wc_linuxkm_fpu_state_assoc(int create_p) {
308+
int my_cpu = raw_smp_processor_id(); /* my_cpu is only trustworthy if we're
309+
* already nonpreemptible -- we'll
310+
* determine that soon enough by
311+
* checking if the pid matches or,
312+
* failing that, if create_p.
313+
*/
314+
pid_t my_pid = task_pid_nr(current), slot_pid;
315+
struct wc_thread_fpu_count_ent *slot;
316+
317+
if (wc_linuxkm_fpu_states == NULL)
318+
return wc_linuxkm_fpu_state_assoc_unlikely(create_p);
319+
320+
slot = &wc_linuxkm_fpu_states[my_cpu];
321+
slot_pid = __atomic_load_n(&slot->pid, __ATOMIC_CONSUME);
322+
if (slot_pid == my_pid)
323+
return slot;
324+
if (create_p) {
325+
/* caller must have already called kernel_fpu_begin() if create_p. */
326+
if (slot_pid == 0) {
327+
__atomic_store_n(&slot->pid, my_pid, __ATOMIC_RELEASE);
328+
return slot;
329+
} else {
330+
return wc_linuxkm_fpu_state_assoc_unlikely(create_p);
331+
}
332+
} else {
333+
return NULL;
334+
}
335+
}
336+
337+
#endif /* !LINUXKM_FPU_STATES_FOLLOW_THREADS */
338+
252339
#ifdef WOLFSSL_COMMERCIAL_LICENSE
253340
static struct fpstate *wc_linuxkm_fpstate_buf_from_fpu_state(
254341
struct wc_thread_fpu_count_ent *state)
@@ -258,7 +345,7 @@ static struct fpstate *wc_linuxkm_fpstate_buf_from_fpu_state(
258345
}
259346
#endif
260347

261-
static void wc_linuxkm_fpu_state_release(struct wc_thread_fpu_count_ent *ent) {
348+
static void wc_linuxkm_fpu_state_release_unlikely(struct wc_thread_fpu_count_ent *ent) {
262349
if (ent->fpu_state != 0) {
263350
static int warned_nonzero_fpu_state = 0;
264351
if (! warned_nonzero_fpu_state) {
@@ -271,16 +358,33 @@ static void wc_linuxkm_fpu_state_release(struct wc_thread_fpu_count_ent *ent) {
271358
__atomic_store_n(&ent->pid, 0, __ATOMIC_RELEASE);
272359
}
273360

361+
static inline void wc_linuxkm_fpu_state_release(struct wc_thread_fpu_count_ent *ent) {
362+
if (unlikely(ent->fpu_state != 0))
363+
return wc_linuxkm_fpu_state_release_unlikely(ent);
364+
__atomic_store_n(&ent->pid, 0, __ATOMIC_RELEASE);
365+
}
366+
274367
WARN_UNUSED_RESULT int save_vector_registers_x86(void)
275368
{
369+
#ifdef LINUXKM_FPU_STATES_FOLLOW_THREADS
276370
struct wc_thread_fpu_count_ent *pstate = wc_linuxkm_fpu_state_assoc(1);
277-
if (pstate == NULL)
278-
return MEMORY_E;
371+
#else
372+
struct wc_thread_fpu_count_ent *pstate = wc_linuxkm_fpu_state_assoc(0);
373+
#endif
279374

280375
/* allow for nested calls */
281-
if (pstate->fpu_state != 0U) {
282-
if ((pstate->fpu_state & WC_FPU_COUNT_MASK)
283-
== WC_FPU_COUNT_MASK)
376+
#ifdef LINUXKM_FPU_STATES_FOLLOW_THREADS
377+
if (pstate == NULL)
378+
return MEMORY_E;
379+
#endif
380+
if (
381+
#ifndef LINUXKM_FPU_STATES_FOLLOW_THREADS
382+
(pstate != NULL) &&
383+
#endif
384+
(pstate->fpu_state != 0U))
385+
{
386+
if (unlikely((pstate->fpu_state & WC_FPU_COUNT_MASK)
387+
== WC_FPU_COUNT_MASK))
284388
{
285389
pr_err("save_vector_registers_x86 recursion register overflow for "
286390
"pid %d.\n", pstate->pid);
@@ -298,31 +402,58 @@ WARN_UNUSED_RESULT int save_vector_registers_x86(void)
298402
fpstate->xfeatures = ~0UL;
299403
os_xsave(fpstate);
300404
#else /* !WOLFSSL_COMMERCIAL_LICENSE */
301-
#if defined(CONFIG_SMP) && !defined(CONFIG_PREEMPT_COUNT) && \
302-
(LINUX_VERSION_CODE >= KERNEL_VERSION(5, 7, 0))
405+
#if defined(CONFIG_SMP) && !defined(CONFIG_PREEMPT_COUNT) && \
406+
(LINUX_VERSION_CODE >= KERNEL_VERSION(5, 7, 0))
303407
/* inhibit migration, which gums up the algorithm in
304408
* kernel_fpu_{begin,end}().
305409
*/
306410
migrate_disable();
307-
#endif
411+
#endif
308412
kernel_fpu_begin();
413+
414+
#ifndef LINUXKM_FPU_STATES_FOLLOW_THREADS
415+
pstate = wc_linuxkm_fpu_state_assoc(1);
416+
if (pstate == NULL) {
417+
kernel_fpu_end();
418+
#if defined(CONFIG_SMP) && !defined(CONFIG_PREEMPT_COUNT) && \
419+
(LINUX_VERSION_CODE >= KERNEL_VERSION(5, 7, 0)) && \
420+
!defined(WOLFSSL_COMMERCIAL_LICENSE)
421+
migrate_enable();
422+
#endif
423+
return BAD_STATE_E;
424+
}
425+
#endif
426+
309427
#endif /* !WOLFSSL_COMMERCIAL_LICENSE */
310-
/* set msb 0 to trigger kernel_fpu_end() at cleanup. */
428+
/* set msb to 0 to trigger kernel_fpu_end() at cleanup. */
311429
pstate->fpu_state = 1U;
312430
} else if (in_nmi() || (hardirq_count() > 0) || (softirq_count() > 0)) {
313431
static int warned_fpu_forbidden = 0;
314432
if (! warned_fpu_forbidden)
315433
pr_err("save_vector_registers_x86 called from IRQ handler.\n");
434+
#ifdef LINUXKM_FPU_STATES_FOLLOW_THREADS
316435
wc_linuxkm_fpu_state_release(pstate);
436+
#endif
317437
return BAD_STATE_E;
318438
} else {
439+
/* assume already safely in_kernel_fpu. */
319440
#if defined(CONFIG_SMP) && !defined(CONFIG_PREEMPT_COUNT) && \
320441
(LINUX_VERSION_CODE >= KERNEL_VERSION(5, 7, 0)) && \
321442
!defined(WOLFSSL_COMMERCIAL_LICENSE)
322443
migrate_disable();
323444
#endif
324-
/* assume already safely in_kernel_fpu. */
325-
/* set msb 1 to inhibit kernel_fpu_end() at cleanup. */
445+
#ifndef LINUXKM_FPU_STATES_FOLLOW_THREADS
446+
pstate = wc_linuxkm_fpu_state_assoc(1);
447+
if (pstate == NULL) {
448+
#if defined(CONFIG_SMP) && !defined(CONFIG_PREEMPT_COUNT) && \
449+
(LINUX_VERSION_CODE >= KERNEL_VERSION(5, 7, 0)) && \
450+
!defined(WOLFSSL_COMMERCIAL_LICENSE)
451+
migrate_enable();
452+
#endif
453+
return BAD_STATE_E;
454+
}
455+
#endif
456+
/* set msb to 1 to inhibit kernel_fpu_end() at cleanup. */
326457
pstate->fpu_state =
327458
WC_FPU_SAVED_MASK + 1U;
328459
}
@@ -333,9 +464,10 @@ WARN_UNUSED_RESULT int save_vector_registers_x86(void)
333464
void restore_vector_registers_x86(void)
334465
{
335466
struct wc_thread_fpu_count_ent *pstate = wc_linuxkm_fpu_state_assoc(0);
336-
if (pstate == NULL) {
337-
pr_err("restore_vector_registers_x86 called by pid %d "
338-
"with no saved state.\n", task_pid_nr(current));
467+
if (unlikely(pstate == NULL)) {
468+
pr_err("restore_vector_registers_x86 called by pid %d on CPU %d "
469+
"with no saved state.\n", task_pid_nr(current),
470+
raw_smp_processor_id());
339471
return;
340472
}
341473

@@ -349,17 +481,26 @@ void restore_vector_registers_x86(void)
349481
os_xrstor(fpstate, fpstate->xfeatures);
350482
fpregs_unlock();
351483
#else
484+
#ifndef LINUXKM_FPU_STATES_FOLLOW_THREADS
485+
wc_linuxkm_fpu_state_release(pstate);
486+
#endif
352487
kernel_fpu_end();
353488
#endif
354-
} else
489+
} else {
355490
pstate->fpu_state = 0U;
491+
#ifndef LINUXKM_FPU_STATES_FOLLOW_THREADS
492+
wc_linuxkm_fpu_state_release(pstate);
493+
#endif
494+
}
356495
#if defined(CONFIG_SMP) && !defined(CONFIG_PREEMPT_COUNT) && \
357496
(LINUX_VERSION_CODE >= KERNEL_VERSION(5, 7, 0)) && \
358497
!defined(WOLFSSL_COMMERCIAL_LICENSE)
359498
migrate_enable();
360499
#endif
361500

501+
#ifdef LINUXKM_FPU_STATES_FOLLOW_THREADS
362502
wc_linuxkm_fpu_state_release(pstate);
503+
#endif
363504

364505
return;
365506
}

0 commit comments

Comments
 (0)