@@ -83,6 +83,10 @@ struct wc_thread_fpu_count_ent *wc_linuxkm_fpu_states = NULL;
8383
8484#ifdef WOLFSSL_COMMERCIAL_LICENSE
8585
86+ #ifndef LINUXKM_FPU_STATES_FOLLOW_THREADS
87+ #error WOLFSSL_COMMERCIAL_LICENSE requires LINUXKM_FPU_STATES_FOLLOW_THREADS
88+ #endif
89+
8690#pragma GCC diagnostic push
8791#pragma GCC diagnostic ignored "-Wunused-parameter"
8892#pragma GCC diagnostic ignored "-Wnested-externs"
@@ -114,10 +118,14 @@ WARN_UNUSED_RESULT int allocate_wolfcrypt_linuxkm_fpu_states(void)
114118 return BAD_STATE_E ;
115119 }
116120
121+ #ifdef LINUXKM_FPU_STATES_FOLLOW_THREADS
117122 if (nr_cpu_ids >= 16 )
118123 wc_linuxkm_fpu_states_n_tracked = nr_cpu_ids * 2 ;
119124 else
120125 wc_linuxkm_fpu_states_n_tracked = 32 ;
126+ #else
127+ wc_linuxkm_fpu_states_n_tracked = nr_cpu_ids ;
128+ #endif
121129
122130 wc_linuxkm_fpu_states =
123131 (struct wc_thread_fpu_count_ent * )malloc (
@@ -198,7 +206,8 @@ void free_wolfcrypt_linuxkm_fpu_states(void) {
198206 wc_linuxkm_fpu_states = NULL ;
199207}
200208
201- /* lock-(mostly)-free thread-local storage facility for tracking recursive fpu
209+ #ifdef LINUXKM_FPU_STATES_FOLLOW_THREADS
210+ /* legacy thread-local storage facility for tracking recursive fpu
202211 * pushing/popping
203212 */
204213static struct wc_thread_fpu_count_ent * wc_linuxkm_fpu_state_assoc (int create_p ) {
@@ -249,6 +258,84 @@ static struct wc_thread_fpu_count_ent *wc_linuxkm_fpu_state_assoc(int create_p)
249258 }
250259}
251260
261+ #else /* !LINUXKM_FPU_STATES_FOLLOW_THREADS */
262+
263+ /* lock-free O(1)-lookup CPU-local storage facility for tracking recursive fpu
264+ * pushing/popping
265+ */
266+ static struct wc_thread_fpu_count_ent * wc_linuxkm_fpu_state_assoc_unlikely (int create_p ) {
267+ int my_cpu = raw_smp_processor_id ();
268+ pid_t my_pid = task_pid_nr (current ), slot_pid ;
269+ struct wc_thread_fpu_count_ent * slot ;
270+
271+ {
272+ static int _warned_on_null = 0 ;
273+ if (wc_linuxkm_fpu_states == NULL )
274+ {
275+ if (_warned_on_null == 0 ) {
276+ pr_err ("wc_linuxkm_fpu_state_assoc called by pid %d"
277+ " before allocate_wolfcrypt_linuxkm_fpu_states.\n" , my_pid );
278+ _warned_on_null = 1 ;
279+ }
280+ return NULL ;
281+ }
282+ }
283+
284+ slot = & wc_linuxkm_fpu_states [my_cpu ];
285+ slot_pid = __atomic_load_n (& slot -> pid , __ATOMIC_CONSUME );
286+ if (slot_pid == my_pid )
287+ return slot ;
288+ if (create_p ) {
289+ /* caller must have already called kernel_fpu_begin() if create_p. */
290+ if (slot_pid == 0 ) {
291+ __atomic_store_n (& slot -> pid , my_pid , __ATOMIC_RELEASE );
292+ return slot ;
293+ } else {
294+ static int _warned_on_mismatched_pid = 0 ;
295+ if (_warned_on_mismatched_pid < 10 ) {
296+ pr_err ("wc_linuxkm_fpu_state_assoc called by pid %d on cpu %d"
297+ " but cpu slot already reserved by pid %d.\n" , my_pid , my_cpu , slot_pid );
298+ ++ _warned_on_mismatched_pid ;
299+ }
300+ return NULL ;
301+ }
302+ } else {
303+ return NULL ;
304+ }
305+ }
306+
307+ static inline struct wc_thread_fpu_count_ent * wc_linuxkm_fpu_state_assoc (int create_p ) {
308+ int my_cpu = raw_smp_processor_id (); /* my_cpu is only trustworthy if we're
309+ * already nonpreemptible -- we'll
310+ * determine that soon enough by
311+ * checking if the pid matches or,
312+ * failing that, if create_p.
313+ */
314+ pid_t my_pid = task_pid_nr (current ), slot_pid ;
315+ struct wc_thread_fpu_count_ent * slot ;
316+
317+ if (wc_linuxkm_fpu_states == NULL )
318+ return wc_linuxkm_fpu_state_assoc_unlikely (create_p );
319+
320+ slot = & wc_linuxkm_fpu_states [my_cpu ];
321+ slot_pid = __atomic_load_n (& slot -> pid , __ATOMIC_CONSUME );
322+ if (slot_pid == my_pid )
323+ return slot ;
324+ if (create_p ) {
325+ /* caller must have already called kernel_fpu_begin() if create_p. */
326+ if (slot_pid == 0 ) {
327+ __atomic_store_n (& slot -> pid , my_pid , __ATOMIC_RELEASE );
328+ return slot ;
329+ } else {
330+ return wc_linuxkm_fpu_state_assoc_unlikely (create_p );
331+ }
332+ } else {
333+ return NULL ;
334+ }
335+ }
336+
337+ #endif /* !LINUXKM_FPU_STATES_FOLLOW_THREADS */
338+
252339#ifdef WOLFSSL_COMMERCIAL_LICENSE
253340static struct fpstate * wc_linuxkm_fpstate_buf_from_fpu_state (
254341 struct wc_thread_fpu_count_ent * state )
@@ -258,7 +345,7 @@ static struct fpstate *wc_linuxkm_fpstate_buf_from_fpu_state(
258345}
259346#endif
260347
261- static void wc_linuxkm_fpu_state_release (struct wc_thread_fpu_count_ent * ent ) {
348+ static void wc_linuxkm_fpu_state_release_unlikely (struct wc_thread_fpu_count_ent * ent ) {
262349 if (ent -> fpu_state != 0 ) {
263350 static int warned_nonzero_fpu_state = 0 ;
264351 if (! warned_nonzero_fpu_state ) {
@@ -271,16 +358,33 @@ static void wc_linuxkm_fpu_state_release(struct wc_thread_fpu_count_ent *ent) {
271358 __atomic_store_n (& ent -> pid , 0 , __ATOMIC_RELEASE );
272359}
273360
361+ static inline void wc_linuxkm_fpu_state_release (struct wc_thread_fpu_count_ent * ent ) {
362+ if (unlikely (ent -> fpu_state != 0 ))
363+ return wc_linuxkm_fpu_state_release_unlikely (ent );
364+ __atomic_store_n (& ent -> pid , 0 , __ATOMIC_RELEASE );
365+ }
366+
274367WARN_UNUSED_RESULT int save_vector_registers_x86 (void )
275368{
369+ #ifdef LINUXKM_FPU_STATES_FOLLOW_THREADS
276370 struct wc_thread_fpu_count_ent * pstate = wc_linuxkm_fpu_state_assoc (1 );
277- if (pstate == NULL )
278- return MEMORY_E ;
371+ #else
372+ struct wc_thread_fpu_count_ent * pstate = wc_linuxkm_fpu_state_assoc (0 );
373+ #endif
279374
280375 /* allow for nested calls */
281- if (pstate -> fpu_state != 0U ) {
282- if ((pstate -> fpu_state & WC_FPU_COUNT_MASK )
283- == WC_FPU_COUNT_MASK )
376+ #ifdef LINUXKM_FPU_STATES_FOLLOW_THREADS
377+ if (pstate == NULL )
378+ return MEMORY_E ;
379+ #endif
380+ if (
381+ #ifndef LINUXKM_FPU_STATES_FOLLOW_THREADS
382+ (pstate != NULL ) &&
383+ #endif
384+ (pstate -> fpu_state != 0U ))
385+ {
386+ if (unlikely ((pstate -> fpu_state & WC_FPU_COUNT_MASK )
387+ == WC_FPU_COUNT_MASK ))
284388 {
285389 pr_err ("save_vector_registers_x86 recursion register overflow for "
286390 "pid %d.\n" , pstate -> pid );
@@ -298,31 +402,58 @@ WARN_UNUSED_RESULT int save_vector_registers_x86(void)
298402 fpstate -> xfeatures = ~0UL ;
299403 os_xsave (fpstate );
300404#else /* !WOLFSSL_COMMERCIAL_LICENSE */
301- #if defined(CONFIG_SMP ) && !defined(CONFIG_PREEMPT_COUNT ) && \
302- (LINUX_VERSION_CODE >= KERNEL_VERSION (5 , 7 , 0 ))
405+ #if defined(CONFIG_SMP ) && !defined(CONFIG_PREEMPT_COUNT ) && \
406+ (LINUX_VERSION_CODE >= KERNEL_VERSION (5 , 7 , 0 ))
303407 /* inhibit migration, which gums up the algorithm in
304408 * kernel_fpu_{begin,end}().
305409 */
306410 migrate_disable ();
307- #endif
411+ #endif
308412 kernel_fpu_begin ();
413+
414+ #ifndef LINUXKM_FPU_STATES_FOLLOW_THREADS
415+ pstate = wc_linuxkm_fpu_state_assoc (1 );
416+ if (pstate == NULL ) {
417+ kernel_fpu_end ();
418+ #if defined(CONFIG_SMP ) && !defined(CONFIG_PREEMPT_COUNT ) && \
419+ (LINUX_VERSION_CODE >= KERNEL_VERSION (5 , 7 , 0 )) && \
420+ !defined(WOLFSSL_COMMERCIAL_LICENSE )
421+ migrate_enable ();
422+ #endif
423+ return BAD_STATE_E ;
424+ }
425+ #endif
426+
309427#endif /* !WOLFSSL_COMMERCIAL_LICENSE */
310- /* set msb 0 to trigger kernel_fpu_end() at cleanup. */
428+ /* set msb to 0 to trigger kernel_fpu_end() at cleanup. */
311429 pstate -> fpu_state = 1U ;
312430 } else if (in_nmi () || (hardirq_count () > 0 ) || (softirq_count () > 0 )) {
313431 static int warned_fpu_forbidden = 0 ;
314432 if (! warned_fpu_forbidden )
315433 pr_err ("save_vector_registers_x86 called from IRQ handler.\n" );
434+ #ifdef LINUXKM_FPU_STATES_FOLLOW_THREADS
316435 wc_linuxkm_fpu_state_release (pstate );
436+ #endif
317437 return BAD_STATE_E ;
318438 } else {
439+ /* assume already safely in_kernel_fpu. */
319440#if defined(CONFIG_SMP ) && !defined(CONFIG_PREEMPT_COUNT ) && \
320441 (LINUX_VERSION_CODE >= KERNEL_VERSION (5 , 7 , 0 )) && \
321442 !defined(WOLFSSL_COMMERCIAL_LICENSE )
322443 migrate_disable ();
323444#endif
324- /* assume already safely in_kernel_fpu. */
325- /* set msb 1 to inhibit kernel_fpu_end() at cleanup. */
445+ #ifndef LINUXKM_FPU_STATES_FOLLOW_THREADS
446+ pstate = wc_linuxkm_fpu_state_assoc (1 );
447+ if (pstate == NULL ) {
448+ #if defined(CONFIG_SMP ) && !defined(CONFIG_PREEMPT_COUNT ) && \
449+ (LINUX_VERSION_CODE >= KERNEL_VERSION (5 , 7 , 0 )) && \
450+ !defined(WOLFSSL_COMMERCIAL_LICENSE )
451+ migrate_enable ();
452+ #endif
453+ return BAD_STATE_E ;
454+ }
455+ #endif
456+ /* set msb to 1 to inhibit kernel_fpu_end() at cleanup. */
326457 pstate -> fpu_state =
327458 WC_FPU_SAVED_MASK + 1U ;
328459 }
@@ -333,9 +464,10 @@ WARN_UNUSED_RESULT int save_vector_registers_x86(void)
333464void restore_vector_registers_x86 (void )
334465{
335466 struct wc_thread_fpu_count_ent * pstate = wc_linuxkm_fpu_state_assoc (0 );
336- if (pstate == NULL ) {
337- pr_err ("restore_vector_registers_x86 called by pid %d "
338- "with no saved state.\n" , task_pid_nr (current ));
467+ if (unlikely (pstate == NULL )) {
468+ pr_err ("restore_vector_registers_x86 called by pid %d on CPU %d "
469+ "with no saved state.\n" , task_pid_nr (current ),
470+ raw_smp_processor_id ());
339471 return ;
340472 }
341473
@@ -349,17 +481,26 @@ void restore_vector_registers_x86(void)
349481 os_xrstor (fpstate , fpstate -> xfeatures );
350482 fpregs_unlock ();
351483#else
484+ #ifndef LINUXKM_FPU_STATES_FOLLOW_THREADS
485+ wc_linuxkm_fpu_state_release (pstate );
486+ #endif
352487 kernel_fpu_end ();
353488#endif
354- } else
489+ } else {
355490 pstate -> fpu_state = 0U ;
491+ #ifndef LINUXKM_FPU_STATES_FOLLOW_THREADS
492+ wc_linuxkm_fpu_state_release (pstate );
493+ #endif
494+ }
356495#if defined(CONFIG_SMP ) && !defined(CONFIG_PREEMPT_COUNT ) && \
357496 (LINUX_VERSION_CODE >= KERNEL_VERSION (5 , 7 , 0 )) && \
358497 !defined(WOLFSSL_COMMERCIAL_LICENSE )
359498 migrate_enable ();
360499#endif
361500
501+ #ifdef LINUXKM_FPU_STATES_FOLLOW_THREADS
362502 wc_linuxkm_fpu_state_release (pstate );
503+ #endif
363504
364505 return ;
365506}
0 commit comments