@@ -371,25 +371,172 @@ static int InitSha256(wc_Sha256* sha256)
371371 } /* extern "C" */
372372#endif
373373
374+ static word32 intel_flags ;
375+ static int Transform_Sha256_is_vectorized = 0 ;
376+
377+ #ifdef WC_NO_INTERNAL_FUNCTION_POINTERS
378+
379+ static enum { SHA256_UNSET , SHA256_AVX1 , SHA256_AVX2 , SHA256_AVX1_RORX ,
380+ SHA256_AVX2_RORX , SHA256_SSE2 , SHA256_C }
381+ sha_method = SHA256_UNSET ;
382+
383+ static void Sha256_SetTransform (void )
384+ {
385+
386+ if (sha_method != SHA256_UNSET )
387+ return ;
388+
389+ intel_flags = cpuid_get_flags ();
390+
391+ if (IS_INTEL_SHA (intel_flags )) {
392+ #ifdef HAVE_INTEL_AVX1
393+ if (IS_INTEL_AVX1 (intel_flags )) {
394+ sha_method = SHA256_AVX1 ;
395+ Transform_Sha256_is_vectorized = 1 ;
396+ }
397+ else
398+ #endif
399+ {
400+ sha_method = SHA256_SSE2 ;
401+ Transform_Sha256_is_vectorized = 1 ;
402+ }
403+ }
404+ else
405+ #ifdef HAVE_INTEL_AVX2
406+ if (IS_INTEL_AVX2 (intel_flags )) {
407+ #ifdef HAVE_INTEL_RORX
408+ if (IS_INTEL_BMI2 (intel_flags )) {
409+ sha_method = SHA256_AVX2_RORX ;
410+ Transform_Sha256_is_vectorized = 1 ;
411+ }
412+ else
413+ #endif
414+ {
415+ sha_method = SHA256_AVX2 ;
416+ Transform_Sha256_is_vectorized = 1 ;
417+ }
418+ }
419+ else
420+ #endif
421+ #ifdef HAVE_INTEL_AVX1
422+ if (IS_INTEL_AVX1 (intel_flags )) {
423+ #ifdef HAVE_INTEL_RORX
424+ if (IS_INTEL_BMI2 (intel_flags )) {
425+ sha_method = SHA256_AVX1_RORX ;
426+ Transform_Sha256_is_vectorized = 1 ;
427+ }
428+ else
429+ #endif
430+ {
431+ sha_method = SHA256_AVX1 ;
432+ Transform_Sha256_is_vectorized = 1 ;
433+ }
434+ }
435+ else
436+ #endif
437+ {
438+ sha_method = SHA256_C ;
439+ Transform_Sha256_is_vectorized = 0 ;
440+ }
441+ }
442+
443+ static WC_INLINE int inline_XTRANSFORM (wc_Sha256 * S , const byte * D ) {
444+ int ret ;
445+ if (sha_method == SHA256_C )
446+ return Transform_Sha256 (S , D );
447+ SAVE_VECTOR_REGISTERS (return _svr_ret ;);
448+ switch (sha_method ) {
449+ case SHA256_AVX2 :
450+ ret = Transform_Sha256_AVX2 (S , D );
451+ break ;
452+ case SHA256_AVX2_RORX :
453+ ret = Transform_Sha256_AVX2_RORX (S , D );
454+ break ;
455+ case SHA256_AVX1 :
456+ ret = Transform_Sha256_AVX1_Sha (S , D );
457+ break ;
458+ case SHA256_AVX1_RORX :
459+ ret = Transform_Sha256_AVX1_RORX (S , D );
460+ break ;
461+ case SHA256_SSE2 :
462+ ret = Transform_Sha256_SSE2_Sha (S , D );
463+ break ;
464+ case SHA256_C :
465+ case SHA256_UNSET :
466+ default :
467+ ret = Transform_Sha256 (S , D );
468+ break ;
469+ }
470+ RESTORE_VECTOR_REGISTERS ();
471+ return ret ;
472+ }
473+ #define XTRANSFORM (...) inline_XTRANSFORM(__VA_ARGS__)
474+
475+ static WC_INLINE int inline_XTRANSFORM_LEN (wc_Sha256 * S , const byte * D , word32 L ) {
476+ int ret ;
477+ SAVE_VECTOR_REGISTERS (return _svr_ret ;);
478+ switch (sha_method ) {
479+ case SHA256_AVX2 :
480+ ret = Transform_Sha256_AVX2_Len (S , D , L );
481+ break ;
482+ case SHA256_AVX2_RORX :
483+ ret = Transform_Sha256_AVX2_RORX_Len (S , D , L );
484+ break ;
485+ case SHA256_AVX1 :
486+ ret = Transform_Sha256_AVX1_Sha_Len (S , D , L );
487+ break ;
488+ case SHA256_AVX1_RORX :
489+ ret = Transform_Sha256_AVX1_RORX_Len (S , D , L );
490+ break ;
491+ case SHA256_SSE2 :
492+ ret = Transform_Sha256_SSE2_Sha_Len (S , D , L );
493+ break ;
494+ case SHA256_C :
495+ case SHA256_UNSET :
496+ default :
497+ ret = 0 ;
498+ break ;
499+ }
500+ RESTORE_VECTOR_REGISTERS ();
501+ return ret ;
502+ }
503+ #define XTRANSFORM_LEN (...) inline_XTRANSFORM_LEN(__VA_ARGS__)
504+
505+ #else /* !WC_NO_INTERNAL_FUNCTION_POINTERS */
506+
374507 static int (* Transform_Sha256_p )(wc_Sha256 * sha256 , const byte * data );
375508 /* = _Transform_Sha256 */
376509 static int (* Transform_Sha256_Len_p )(wc_Sha256 * sha256 , const byte * data ,
377510 word32 len );
378511 /* = NULL */
379512 static int transform_check = 0 ;
380- static word32 intel_flags ;
381- static int Transform_Sha256_is_vectorized = 0 ;
382513
383514 static WC_INLINE int inline_XTRANSFORM (wc_Sha256 * S , const byte * D ) {
384515 int ret ;
516+ #ifdef WOLFSSL_LINUXKM
517+ if (Transform_Sha256_is_vectorized )
518+ SAVE_VECTOR_REGISTERS (return _svr_ret ;);
519+ #endif
385520 ret = (* Transform_Sha256_p )(S , D );
521+ #ifdef WOLFSSL_LINUXKM
522+ if (Transform_Sha256_is_vectorized )
523+ RESTORE_VECTOR_REGISTERS ();
524+ #endif
386525 return ret ;
387526 }
388527#define XTRANSFORM (...) inline_XTRANSFORM(__VA_ARGS__)
389528
390529 static WC_INLINE int inline_XTRANSFORM_LEN (wc_Sha256 * S , const byte * D , word32 L ) {
391530 int ret ;
531+ #ifdef WOLFSSL_LINUXKM
532+ if (Transform_Sha256_is_vectorized )
533+ SAVE_VECTOR_REGISTERS (return _svr_ret ;);
534+ #endif
392535 ret = (* Transform_Sha256_Len_p )(S , D , L );
536+ #ifdef WOLFSSL_LINUXKM
537+ if (Transform_Sha256_is_vectorized )
538+ RESTORE_VECTOR_REGISTERS ();
539+ #endif
393540 return ret ;
394541 }
395542#define XTRANSFORM_LEN (...) inline_XTRANSFORM_LEN(__VA_ARGS__)
@@ -463,6 +610,8 @@ static int InitSha256(wc_Sha256* sha256)
463610 transform_check = 1 ;
464611 }
465612
613+ #endif /* !WC_NO_INTERNAL_FUNCTION_POINTERS */
614+
466615#if !defined(WOLFSSL_KCAPI_HASH )
467616 int wc_InitSha256_ex (wc_Sha256 * sha256 , void * heap , int devId )
468617 {
@@ -1162,7 +1311,13 @@ static int InitSha256(wc_Sha256* sha256)
11621311 #ifdef XTRANSFORM_LEN
11631312 #if defined(WOLFSSL_X86_64_BUILD ) && defined(USE_INTEL_SPEEDUP ) && \
11641313 (defined(HAVE_INTEL_AVX1 ) || defined(HAVE_INTEL_AVX2 ))
1314+
1315+ #ifdef WC_NO_INTERNAL_FUNCTION_POINTERS
1316+ if (sha_method != SHA256_C )
1317+ #else
11651318 if (Transform_Sha256_Len_p != NULL )
1319+ #endif
1320+
11661321 #endif
11671322 {
11681323 if (len >= WC_SHA256_BLOCK_SIZE ) {
0 commit comments