@@ -73,6 +73,13 @@ void *lkm_realloc(void *ptr, size_t newsize) {
7373
7474#if defined(WOLFSSL_LINUXKM_USE_SAVE_VECTOR_REGISTERS ) && defined(CONFIG_X86 )
7575
76+ /* kernel 4.19 -- the most recent LTS before 5.4 -- lacks the necessary safety
77+ * checks in __kernel_fpu_begin(), and lacks TIF_NEED_FPU_LOAD.
78+ */
79+ #if (LINUX_VERSION_CODE < KERNEL_VERSION (5 , 4 , 0 ))
80+ #error WOLFSSL_LINUXKM_USE_SAVE_VECTOR_REGISTERS on x86 requires kernel 5.4.0 or higher.
81+ #endif
82+
7683static unsigned int wc_linuxkm_fpu_states_n_tracked = 0 ;
7784
7885struct wc_thread_fpu_count_ent {
@@ -261,7 +268,10 @@ static struct wc_thread_fpu_count_ent *wc_linuxkm_fpu_state_assoc(int create_p)
261268#else /* !LINUXKM_FPU_STATES_FOLLOW_THREADS */
262269
263270/* lock-free O(1)-lookup CPU-local storage facility for tracking recursive fpu
264- * pushing/popping
271+ * pushing/popping.
272+ *
273+ * caller must have already called kernel_fpu_begin() or preempt_disable()
274+ * before entering this or the streamlined inline version of it below.
265275 */
266276static struct wc_thread_fpu_count_ent * wc_linuxkm_fpu_state_assoc_unlikely (int create_p ) {
267277 int my_cpu = raw_smp_processor_id ();
@@ -283,28 +293,66 @@ static struct wc_thread_fpu_count_ent *wc_linuxkm_fpu_state_assoc_unlikely(int c
283293
284294 slot = & wc_linuxkm_fpu_states [my_cpu ];
285295 slot_pid = __atomic_load_n (& slot -> pid , __ATOMIC_CONSUME );
286- if (slot_pid == my_pid )
296+ if (slot_pid == my_pid ) {
297+ if (create_p ) {
298+ static int _warned_on_redundant_create_p = 0 ;
299+ if (_warned_on_redundant_create_p < 10 ) {
300+ pr_err ("wc_linuxkm_fpu_state_assoc called with create_p=1 by"
301+ " pid %d on cpu %d with cpu slot already reserved by"
302+ " said pid.\n" , my_pid , my_cpu );
303+ ++ _warned_on_redundant_create_p ;
304+ }
305+ }
287306 return slot ;
307+ }
288308 if (create_p ) {
289- /* caller must have already called kernel_fpu_begin() if create_p. */
290309 if (slot_pid == 0 ) {
291310 __atomic_store_n (& slot -> pid , my_pid , __ATOMIC_RELEASE );
292311 return slot ;
293312 } else {
313+ /* if the slot is already occupied, that can be benign due to a
314+ * migration, but it will require fixup by the thread that owns the
315+ * slot, which will happen when it releases its lock, or sooner (see
316+ * below).
317+ */
294318 static int _warned_on_mismatched_pid = 0 ;
295319 if (_warned_on_mismatched_pid < 10 ) {
296- pr_err ("wc_linuxkm_fpu_state_assoc called by pid %d on cpu %d"
297- " but cpu slot already reserved by pid %d.\n" , my_pid , my_cpu , slot_pid );
320+ pr_warn ("wc_linuxkm_fpu_state_assoc called by pid %d on cpu %d"
321+ " but cpu slot already reserved by pid %d.\n" ,
322+ my_pid , my_cpu , slot_pid );
298323 ++ _warned_on_mismatched_pid ;
299324 }
300325 return NULL ;
301326 }
302327 } else {
328+ /* check for migration. this can happen despite our best efforts if any
329+ * I/O occured while locked, e.g. kernel messages like "uninitialized
330+ * urandom read". since we're locked now, we can safely migrate the
331+ * entry in wc_linuxkm_fpu_states[], freeing up the slot on the previous
332+ * cpu.
333+ */
334+ unsigned int cpu_i ;
335+ for (cpu_i = 0 ; cpu_i < wc_linuxkm_fpu_states_n_tracked ; ++ cpu_i ) {
336+ if (__atomic_load_n (
337+ & wc_linuxkm_fpu_states [cpu_i ].pid ,
338+ __ATOMIC_CONSUME )
339+ == my_pid )
340+ {
341+ wc_linuxkm_fpu_states [my_cpu ] = wc_linuxkm_fpu_states [cpu_i ];
342+ __atomic_store_n (& wc_linuxkm_fpu_states [cpu_i ].fpu_state , 0 ,
343+ __ATOMIC_RELEASE );
344+ __atomic_store_n (& wc_linuxkm_fpu_states [cpu_i ].pid , 0 ,
345+ __ATOMIC_RELEASE );
346+ return & wc_linuxkm_fpu_states [my_cpu ];
347+ }
348+ }
303349 return NULL ;
304350 }
305351}
306352
307- static inline struct wc_thread_fpu_count_ent * wc_linuxkm_fpu_state_assoc (int create_p ) {
353+ static inline struct wc_thread_fpu_count_ent * wc_linuxkm_fpu_state_assoc (
354+ int create_p )
355+ {
308356 int my_cpu = raw_smp_processor_id (); /* my_cpu is only trustworthy if we're
309357 * already nonpreemptible -- we'll
310358 * determine that soon enough by
@@ -314,23 +362,26 @@ static inline struct wc_thread_fpu_count_ent *wc_linuxkm_fpu_state_assoc(int cre
314362 pid_t my_pid = task_pid_nr (current ), slot_pid ;
315363 struct wc_thread_fpu_count_ent * slot ;
316364
317- if (wc_linuxkm_fpu_states == NULL )
365+ if (unlikely ( wc_linuxkm_fpu_states == NULL ) )
318366 return wc_linuxkm_fpu_state_assoc_unlikely (create_p );
319367
320368 slot = & wc_linuxkm_fpu_states [my_cpu ];
321369 slot_pid = __atomic_load_n (& slot -> pid , __ATOMIC_CONSUME );
322- if (slot_pid == my_pid )
323- return slot ;
324- if (create_p ) {
325- /* caller must have already called kernel_fpu_begin() if create_p. */
326- if (slot_pid == 0 ) {
370+ if (slot_pid == my_pid ) {
371+ if (unlikely (create_p ))
372+ return wc_linuxkm_fpu_state_assoc_unlikely (create_p );
373+ else
374+ return slot ;
375+ }
376+ if (likely (create_p )) {
377+ if (likely (slot_pid == 0 )) {
327378 __atomic_store_n (& slot -> pid , my_pid , __ATOMIC_RELEASE );
328379 return slot ;
329380 } else {
330381 return wc_linuxkm_fpu_state_assoc_unlikely (create_p );
331382 }
332383 } else {
333- return NULL ;
384+ return wc_linuxkm_fpu_state_assoc_unlikely ( create_p ) ;
334385 }
335386}
336387
@@ -345,7 +396,9 @@ static struct fpstate *wc_linuxkm_fpstate_buf_from_fpu_state(
345396}
346397#endif
347398
348- static void wc_linuxkm_fpu_state_release_unlikely (struct wc_thread_fpu_count_ent * ent ) {
399+ static void wc_linuxkm_fpu_state_release_unlikely (
400+ struct wc_thread_fpu_count_ent * ent )
401+ {
349402 if (ent -> fpu_state != 0 ) {
350403 static int warned_nonzero_fpu_state = 0 ;
351404 if (! warned_nonzero_fpu_state ) {
@@ -358,7 +411,9 @@ static void wc_linuxkm_fpu_state_release_unlikely(struct wc_thread_fpu_count_ent
358411 __atomic_store_n (& ent -> pid , 0 , __ATOMIC_RELEASE );
359412}
360413
361- static inline void wc_linuxkm_fpu_state_release (struct wc_thread_fpu_count_ent * ent ) {
414+ static inline void wc_linuxkm_fpu_state_release (
415+ struct wc_thread_fpu_count_ent * ent )
416+ {
362417 if (unlikely (ent -> fpu_state != 0 ))
363418 return wc_linuxkm_fpu_state_release_unlikely (ent );
364419 __atomic_store_n (& ent -> pid , 0 , __ATOMIC_RELEASE );
@@ -395,7 +450,16 @@ WARN_UNUSED_RESULT int save_vector_registers_x86(void)
395450 }
396451 }
397452
398- if (irq_fpu_usable ()) {
453+ if (irq_fpu_usable ()
454+ #if (LINUX_VERSION_CODE < KERNEL_VERSION (5 , 17 , 0 ))
455+ /* work around a kernel bug -- see linux commit 59f5ede3bc0f0.
456+ * what we really want here is this_cpu_read(in_kernel_fpu), but
457+ * in_kernel_fpu is an unexported static array.
458+ */
459+ && !test_thread_flag (TIF_NEED_FPU_LOAD )
460+ #endif
461+ )
462+ {
399463#ifdef WOLFSSL_COMMERCIAL_LICENSE
400464 struct fpstate * fpstate = wc_linuxkm_fpstate_buf_from_fpu_state (pstate );
401465 fpregs_lock ();
@@ -433,10 +497,22 @@ WARN_UNUSED_RESULT int save_vector_registers_x86(void)
433497 pr_err ("save_vector_registers_x86 called from IRQ handler.\n" );
434498#ifdef LINUXKM_FPU_STATES_FOLLOW_THREADS
435499 wc_linuxkm_fpu_state_release (pstate );
500+ #endif
501+ return BAD_STATE_E ;
502+ } else if (!test_thread_flag (TIF_NEED_FPU_LOAD )) {
503+ static int warned_fpu_forbidden = 0 ;
504+ if (! warned_fpu_forbidden )
505+ pr_err ("save_vector_registers_x86 called with !irq_fpu_usable from"
506+ " thread without previous FPU save.\n" );
507+ #ifdef LINUXKM_FPU_STATES_FOLLOW_THREADS
508+ wc_linuxkm_fpu_state_release (pstate );
436509#endif
437510 return BAD_STATE_E ;
438511 } else {
439- /* assume already safely in_kernel_fpu. */
512+ /* assume already safely in_kernel_fpu from caller, but recursively
513+ * preempt_disable() to be extra-safe.
514+ */
515+ preempt_disable ();
440516#if defined(CONFIG_SMP ) && !defined(CONFIG_PREEMPT_COUNT ) && \
441517 (LINUX_VERSION_CODE >= KERNEL_VERSION (5 , 7 , 0 )) && \
442518 !defined(WOLFSSL_COMMERCIAL_LICENSE )
@@ -450,6 +526,7 @@ WARN_UNUSED_RESULT int save_vector_registers_x86(void)
450526 !defined(WOLFSSL_COMMERCIAL_LICENSE )
451527 migrate_enable ();
452528 #endif
529+ preempt_enable ();
453530 return BAD_STATE_E ;
454531 }
455532#endif
@@ -491,6 +568,7 @@ void restore_vector_registers_x86(void)
491568 #ifndef LINUXKM_FPU_STATES_FOLLOW_THREADS
492569 wc_linuxkm_fpu_state_release (pstate );
493570 #endif
571+ preempt_enable ();
494572 }
495573#if defined(CONFIG_SMP ) && !defined(CONFIG_PREEMPT_COUNT ) && \
496574 (LINUX_VERSION_CODE >= KERNEL_VERSION (5 , 7 , 0 )) && \
0 commit comments