Skip to content

Commit 316681b

Browse files
authored
Merge pull request #8826 from douzzer/20250530-ML-KEM-WC_SHA3_NO_ASM
20250530-ML-KEM-WC_SHA3_NO_ASM
2 parents 5c21551 + dd6e601 commit 316681b

7 files changed

Lines changed: 56 additions & 26 deletions

File tree

.github/workflows/wolfCrypt-Wconversion.yml

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,10 @@ jobs:
3535
name: Checkout wolfSSL
3636

3737
- name: install_multilib
38-
run: sudo apt-get install -y gcc-multilib
38+
run: |
39+
export DEBIAN_FRONTEND=noninteractive
40+
sudo apt-get update
41+
sudo apt-get install -y gcc-multilib
3942
4043
- name: Build wolfCrypt with extra type conversion warnings
4144
run: |

configure.ac

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1502,6 +1502,9 @@ do
15021502
ml-kem)
15031503
ENABLED_ML_KEM=yes
15041504
;;
1505+
noasm)
1506+
AM_CFLAGS="$AM_CFLAGS -DWC_MLKEM_NO_ASM"
1507+
;;
15051508
*)
15061509
AC_MSG_ERROR([Invalid choice for MLKEM []: $ENABLED_MLKEM.])
15071510
break;;

linuxkm/Kbuild

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -143,6 +143,7 @@ $(obj)/wolfcrypt/src/chacha_asm.o: OBJECT_FILES_NON_STANDARD := y
143143
$(obj)/wolfcrypt/src/poly1305_asm.o: asflags-y = $(WOLFSSL_ASFLAGS) $(ASFLAGS_FPU_DISABLE_SIMD_ENABLE)
144144
$(obj)/wolfcrypt/src/poly1305_asm.o: OBJECT_FILES_NON_STANDARD := y
145145
$(obj)/wolfcrypt/src/wc_mlkem_asm.o: asflags-y = $(WOLFSSL_ASFLAGS) $(ASFLAGS_FPU_DISABLE_SIMD_ENABLE)
146+
$(obj)/wolfcrypt/src/wc_mlkem_asm.o: OBJECT_FILES_NON_STANDARD := y
146147

147148
ifndef READELF
148149
READELF := readelf

wolfcrypt/src/sha3.c

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -248,9 +248,6 @@ while (0)
248248
*
249249
* s The state.
250250
*/
251-
#ifndef USE_INTEL_SPEEDUP
252-
static
253-
#endif
254251
void BlockSha3(word64* s)
255252
{
256253
byte i, x, y;
@@ -541,9 +538,6 @@ while (0)
541538
*
542539
* s The state.
543540
*/
544-
#ifndef USE_INTEL_SPEEDUP
545-
static
546-
#endif
547541
void BlockSha3(word64* s)
548542
{
549543
word64 n[25];

wolfcrypt/src/wc_mlkem.c

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,12 @@
6565

6666
#include <wolfssl/wolfcrypt/libwolfssl_sources.h>
6767

68+
#ifdef WC_MLKEM_NO_ASM
69+
#undef USE_INTEL_SPEEDUP
70+
#undef WOLFSSL_ARMASM
71+
#undef WOLFSSL_RISCV_ASM
72+
#endif
73+
6874
#include <wolfssl/wolfcrypt/mlkem.h>
6975
#include <wolfssl/wolfcrypt/wc_mlkem.h>
7076
#include <wolfssl/wolfcrypt/hash.h>

wolfcrypt/src/wc_mlkem_poly.c

Lines changed: 26 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,12 @@
6969

7070
#include <wolfssl/wolfcrypt/libwolfssl_sources.h>
7171

72+
#ifdef WC_MLKEM_NO_ASM
73+
#undef USE_INTEL_SPEEDUP
74+
#undef WOLFSSL_ARMASM
75+
#undef WOLFSSL_RISCV_ASM
76+
#endif
77+
7278
#include <wolfssl/wolfcrypt/wc_mlkem.h>
7379
#include <wolfssl/wolfcrypt/cpuid.h>
7480

@@ -2481,6 +2487,7 @@ static int mlkem_gen_matrix_k3_avx2(sword16* a, byte* seed, int transposed)
24812487
XMEMSET(state + 5, 0, sizeof(*state) * (25 - 5));
24822488
state[20] = W64LIT(0x8000000000000000);
24832489
for (i = 0; i < GEN_MATRIX_SIZE; i += SHA3_128_BYTES) {
2490+
#ifndef WC_SHA3_NO_ASM
24842491
if (IS_INTEL_BMI2(cpuid_flags)) {
24852492
sha3_block_bmi2(state);
24862493
}
@@ -2489,13 +2496,16 @@ static int mlkem_gen_matrix_k3_avx2(sword16* a, byte* seed, int transposed)
24892496
sha3_block_avx2(state);
24902497
RESTORE_VECTOR_REGISTERS();
24912498
}
2492-
else {
2499+
else
2500+
#endif /* !WC_SHA3_NO_ASM */
2501+
{
24932502
BlockSha3(state);
24942503
}
24952504
XMEMCPY(rand + i, state, SHA3_128_BYTES);
24962505
}
24972506
ctr0 = mlkem_rej_uniform_n_avx2(a, MLKEM_N, rand, GEN_MATRIX_SIZE);
24982507
while (ctr0 < MLKEM_N) {
2508+
#ifndef WC_SHA3_NO_ASM
24992509
if (IS_INTEL_BMI2(cpuid_flags)) {
25002510
sha3_block_bmi2(state);
25012511
}
@@ -2504,7 +2514,9 @@ static int mlkem_gen_matrix_k3_avx2(sword16* a, byte* seed, int transposed)
25042514
sha3_block_avx2(state);
25052515
RESTORE_VECTOR_REGISTERS();
25062516
}
2507-
else {
2517+
else
2518+
#endif /* !WC_SHA3_NO_ASM */
2519+
{
25082520
BlockSha3(state);
25092521
}
25102522
XMEMCPY(rand, state, SHA3_128_BYTES);
@@ -3054,6 +3066,7 @@ static int mlkem_prf(wc_Shake* shake256, byte* out, unsigned int outLen,
30543066
unsigned int len = min(outLen, WC_SHA3_256_BLOCK_SIZE);
30553067

30563068
/* Perform a block operation on the state for next block of output. */
3069+
#ifndef WC_SHA3_NO_ASM
30573070
if (IS_INTEL_BMI2(cpuid_flags)) {
30583071
sha3_block_bmi2(state);
30593072
}
@@ -3062,7 +3075,9 @@ static int mlkem_prf(wc_Shake* shake256, byte* out, unsigned int outLen,
30623075
sha3_block_avx2(state);
30633076
RESTORE_VECTOR_REGISTERS();
30643077
}
3065-
else {
3078+
else
3079+
#endif /* !WC_SHA3_NO_ASM */
3080+
{
30663081
BlockSha3(state);
30673082
}
30683083

@@ -3109,14 +3124,17 @@ int mlkem_kdf(byte* seed, int seedLen, byte* out, int outLen)
31093124
XMEMSET(state + len64 + 1, 0, (25 - len64 - 1) * sizeof(word64));
31103125
state[WC_SHA3_256_COUNT - 1] = W64LIT(0x8000000000000000);
31113126

3127+
#ifndef WC_SHA3_NO_ASM
31123128
if (IS_INTEL_BMI2(cpuid_flags)) {
31133129
sha3_block_bmi2(state);
31143130
}
31153131
else if (IS_INTEL_AVX2(cpuid_flags) && (SAVE_VECTOR_REGISTERS2() == 0)) {
31163132
sha3_block_avx2(state);
31173133
RESTORE_VECTOR_REGISTERS();
31183134
}
3119-
else {
3135+
else
3136+
#endif
3137+
{
31203138
BlockSha3(state);
31213139
}
31223140
XMEMCPY(out, state, outLen);
@@ -4121,14 +4139,17 @@ static int mlkem_get_noise_eta2_avx2(MLKEM_PRF_T* prf, sword16* p,
41214139
state[WC_SHA3_256_COUNT - 1] = W64LIT(0x8000000000000000);
41224140

41234141
/* Perform a block operation on the state for next block of output. */
4142+
#ifndef WC_SHA3_NO_ASM
41244143
if (IS_INTEL_BMI2(cpuid_flags)) {
41254144
sha3_block_bmi2(state);
41264145
}
41274146
else if (IS_INTEL_AVX2(cpuid_flags) && (SAVE_VECTOR_REGISTERS2() == 0)) {
41284147
sha3_block_avx2(state);
41294148
RESTORE_VECTOR_REGISTERS();
41304149
}
4131-
else {
4150+
else
4151+
#endif /* !WC_SHA3_NO_ASM */
4152+
{
41324153
BlockSha3(state);
41334154
}
41344155
mlkem_cbd_eta2_avx2(p, (byte*)state);

wolfssl/wolfcrypt/sha3.h

Lines changed: 16 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -220,23 +220,25 @@ WOLFSSL_API int wc_Shake256_Copy(wc_Shake* src, wc_Sha3* dst);
220220
WOLFSSL_API int wc_Sha3_GetFlags(wc_Sha3* sha3, word32* flags);
221221
#endif
222222

223+
WOLFSSL_LOCAL void BlockSha3(word64 *s);
224+
223225
#ifdef WC_SHA3_NO_ASM
224-
/* asm speedups disabled */
226+
/* asm speedups disabled */
227+
#if defined(USE_INTEL_SPEEDUP) && !defined(WC_MLKEM_NO_ASM)
228+
/* native ML-KEM uses this directly. */
229+
WOLFSSL_LOCAL void sha3_blocksx4_avx2(word64* s);
230+
#endif
225231
#elif defined(USE_INTEL_SPEEDUP)
226-
WOLFSSL_LOCAL void sha3_block_n_bmi2(word64* s, const byte* data, word32 n,
227-
word64 c);
228-
WOLFSSL_LOCAL void sha3_block_bmi2(word64* s);
229-
WOLFSSL_LOCAL void sha3_block_avx2(word64* s);
230-
WOLFSSL_LOCAL void sha3_blocksx4_avx2(word64* s);
231-
WOLFSSL_LOCAL void BlockSha3(word64 *s);
232+
WOLFSSL_LOCAL void sha3_block_n_bmi2(word64* s, const byte* data, word32 n,
233+
word64 c);
234+
WOLFSSL_LOCAL void sha3_block_bmi2(word64* s);
235+
WOLFSSL_LOCAL void sha3_block_avx2(word64* s);
236+
WOLFSSL_LOCAL void sha3_blocksx4_avx2(word64* s);
232237
#elif defined(__aarch64__) && defined(WOLFSSL_ARMASM)
233-
#ifdef WOLFSSL_ARMASM_CRYPTO_SHA3
234-
WOLFSSL_LOCAL void BlockSha3_crypto(word64 *s);
235-
#endif
236-
WOLFSSL_LOCAL void BlockSha3_base(word64 *s);
237-
WOLFSSL_LOCAL void BlockSha3(word64 *s);
238-
#elif defined(WOLFSSL_ARMASM) || defined(WOLFSSL_RISCV_ASM)
239-
WOLFSSL_LOCAL void BlockSha3(word64 *s);
238+
#ifdef WOLFSSL_ARMASM_CRYPTO_SHA3
239+
WOLFSSL_LOCAL void BlockSha3_crypto(word64 *s);
240+
#endif
241+
WOLFSSL_LOCAL void BlockSha3_base(word64 *s);
240242
#endif
241243

242244
#ifdef __cplusplus

0 commit comments

Comments
 (0)