Skip to content

Commit d204798

Browse files
committed
Kyber ASM ARMv7E-M/ARMv7-M: added assembly code
Improved performance by reworking kyber_ntt, kyber_invtt, kyber_basemul_mont, kyber_basemul_mont_add, kyber_rej_uniform_c to be in assembly. Replace WOLFSSL_SP_NO_UMAAL with WOLFSSL_ARM_ARCH_7M
1 parent ac788ec commit d204798

10 files changed

Lines changed: 7843 additions & 48 deletions

File tree

src/include.am

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1192,6 +1192,15 @@ endif
11921192
if BUILD_WC_KYBER
11931193
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/wc_kyber.c
11941194
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/wc_kyber_poly.c
1195+
if BUILD_ARMASM
1196+
if BUILD_ARM_THUMB
1197+
if BUILD_ARMASM_INLINE
1198+
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/port/arm/thumb2-kyber-asm_c.c
1199+
else
1200+
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/port/arm/thumb2-kyber-asm.S
1201+
endif !BUILD_ARMASM_INLINE
1202+
endif BUILD_ARM_THUMB
1203+
endif BUILD_ARMASM
11951204
if !BUILD_X86_ASM
11961205
if BUILD_INTELASM
11971206
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/wc_kyber_asm.S

wolfcrypt/src/port/arm/thumb2-curve25519.S

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1511,7 +1511,7 @@ fe_cmov_table:
15111511
#endif /* WC_NO_CACHE_RESISTANT */
15121512
#endif /* HAVE_ED25519_MAKE_KEY || HAVE_ED25519_SIGN */
15131513
#endif /* HAVE_ED25519 */
1514-
#ifdef WOLFSSL_SP_NO_UMAAL
1514+
#ifdef WOLFSSL_ARM_ARCH_7M
15151515
.text
15161516
.align 4
15171517
.globl fe_mul_op
@@ -2023,7 +2023,7 @@ fe_mul_op:
20232023
POP {pc}
20242024
/* Cycle Count = 239 */
20252025
.size fe_mul_op,.-fe_mul_op
2026-
#endif /* WOLFSSL_SP_NO_UMAAL */
2026+
#endif /* WOLFSSL_ARM_ARCH_7M */
20272027
.text
20282028
.align 4
20292029
.globl fe_mul
@@ -2034,7 +2034,7 @@ fe_mul:
20342034
POP {r4, r5, r6, r7, r8, r9, r10, r11, pc}
20352035
/* Cycle Count = 24 */
20362036
.size fe_mul,.-fe_mul
2037-
#ifdef WOLFSSL_SP_NO_UMAAL
2037+
#ifdef WOLFSSL_ARM_ARCH_7M
20382038
.text
20392039
.align 4
20402040
.globl fe_sq_op
@@ -2425,7 +2425,7 @@ fe_sq_op:
24252425
POP {pc}
24262426
/* Cycle Count = 179 */
24272427
.size fe_sq_op,.-fe_sq_op
2428-
#endif /* WOLFSSL_SP_NO_UMAAL */
2428+
#endif /* WOLFSSL_ARM_ARCH_7M */
24292429
.text
24302430
.align 4
24312431
.globl fe_sq
@@ -2437,7 +2437,7 @@ fe_sq:
24372437
/* Cycle Count = 24 */
24382438
.size fe_sq,.-fe_sq
24392439
#ifdef HAVE_CURVE25519
2440-
#ifdef WOLFSSL_SP_NO_UMAAL
2440+
#ifdef WOLFSSL_ARM_ARCH_7M
24412441
.text
24422442
.align 4
24432443
.globl fe_mul121666
@@ -2524,7 +2524,7 @@ fe_mul121666:
25242524
POP {r4, r5, r6, r7, r8, r9, r10, r11, pc}
25252525
/* Cycle Count = 69 */
25262526
.size fe_mul121666,.-fe_mul121666
2527-
#endif /* WOLFSSL_SP_NO_UMAAL */
2527+
#endif /* WOLFSSL_ARM_ARCH_7M */
25282528
#ifndef WC_NO_CACHE_RESISTANT
25292529
.text
25302530
.align 4
@@ -3466,7 +3466,7 @@ L_fe_invert8:
34663466
POP {r4, r5, r6, r7, r8, r9, r10, r11, pc}
34673467
/* Cycle Count = 292 */
34683468
.size fe_invert,.-fe_invert
3469-
#ifdef WOLFSSL_SP_NO_UMAAL
3469+
#ifdef WOLFSSL_ARM_ARCH_7M
34703470
.text
34713471
.align 4
34723472
.globl fe_sq2
@@ -3925,7 +3925,7 @@ fe_sq2:
39253925
POP {pc}
39263926
/* Cycle Count = 213 */
39273927
.size fe_sq2,.-fe_sq2
3928-
#endif /* WOLFSSL_SP_NO_UMAAL */
3928+
#endif /* WOLFSSL_ARM_ARCH_7M */
39293929
.text
39303930
.align 4
39313931
.globl fe_pow22523
@@ -4535,7 +4535,7 @@ ge_sub:
45354535
POP {r4, r5, r6, r7, r8, r9, r10, r11, pc}
45364536
/* Cycle Count = 138 */
45374537
.size ge_sub,.-ge_sub
4538-
#ifdef WOLFSSL_SP_NO_UMAAL
4538+
#ifdef WOLFSSL_ARM_ARCH_7M
45394539
.text
45404540
.align 4
45414541
.globl sc_reduce
@@ -5258,9 +5258,9 @@ sc_reduce:
52585258
POP {r4, r5, r6, r7, r8, r9, r10, r11, pc}
52595259
/* Cycle Count = 502 */
52605260
.size sc_reduce,.-sc_reduce
5261-
#endif /* WOLFSSL_SP_NO_UMAAL */
5261+
#endif /* WOLFSSL_ARM_ARCH_7M */
52625262
#ifdef HAVE_ED25519_SIGN
5263-
#ifdef WOLFSSL_SP_NO_UMAAL
5263+
#ifdef WOLFSSL_ARM_ARCH_7M
52645264
.text
52655265
.align 4
52665266
.globl sc_muladd
@@ -6470,7 +6470,7 @@ sc_muladd:
64706470
POP {r4, r5, r6, r7, r8, r9, r10, r11, pc}
64716471
/* Cycle Count = 752 */
64726472
.size sc_muladd,.-sc_muladd
6473-
#endif /* WOLFSSL_SP_NO_UMAAL */
6473+
#endif /* WOLFSSL_ARM_ARCH_7M */
64746474
#endif /* HAVE_ED25519_SIGN */
64756475
#endif /* HAVE_ED25519 */
64766476

wolfcrypt/src/port/arm/thumb2-curve25519_c.c

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1667,7 +1667,7 @@ void fe_cmov_table(fe* r, fe* base, signed char b)
16671667
#endif /* WC_NO_CACHE_RESISTANT */
16681668
#endif /* HAVE_ED25519_MAKE_KEY || HAVE_ED25519_SIGN */
16691669
#endif /* HAVE_ED25519 */
1670-
#ifdef WOLFSSL_SP_NO_UMAAL
1670+
#ifdef WOLFSSL_ARM_ARCH_7M
16711671
void fe_mul_op(void);
16721672
#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
16731673
void fe_mul_op()
@@ -2193,7 +2193,7 @@ void fe_mul_op()
21932193
);
21942194
}
21952195

2196-
#endif /* WOLFSSL_SP_NO_UMAAL */
2196+
#endif /* WOLFSSL_ARM_ARCH_7M */
21972197
#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
21982198
void fe_mul(fe r_p, const fe a_p, const fe b_p)
21992199
#else
@@ -2214,7 +2214,7 @@ void fe_mul(fe r, const fe a, const fe b)
22142214
);
22152215
}
22162216

2217-
#ifdef WOLFSSL_SP_NO_UMAAL
2217+
#ifdef WOLFSSL_ARM_ARCH_7M
22182218
void fe_sq_op(void);
22192219
#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
22202220
void fe_sq_op()
@@ -2619,7 +2619,7 @@ void fe_sq_op()
26192619
);
26202620
}
26212621

2622-
#endif /* WOLFSSL_SP_NO_UMAAL */
2622+
#endif /* WOLFSSL_ARM_ARCH_7M */
26232623
#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
26242624
void fe_sq(fe r_p, const fe a_p)
26252625
#else
@@ -2640,7 +2640,7 @@ void fe_sq(fe r, const fe a)
26402640
}
26412641

26422642
#ifdef HAVE_CURVE25519
2643-
#ifdef WOLFSSL_SP_NO_UMAAL
2643+
#ifdef WOLFSSL_ARM_ARCH_7M
26442644
#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
26452645
void fe_mul121666(fe r_p, fe a_p)
26462646
#else
@@ -2745,7 +2745,7 @@ void fe_mul121666(fe r, fe a)
27452745
);
27462746
}
27472747

2748-
#endif /* WOLFSSL_SP_NO_UMAAL */
2748+
#endif /* WOLFSSL_ARM_ARCH_7M */
27492749
#ifndef WC_NO_CACHE_RESISTANT
27502750
#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
27512751
int curve25519(byte* r_p, const byte* n_p, const byte* a_p)
@@ -3907,7 +3907,7 @@ void fe_invert(fe r, const fe a)
39073907
);
39083908
}
39093909

3910-
#ifdef WOLFSSL_SP_NO_UMAAL
3910+
#ifdef WOLFSSL_ARM_ARCH_7M
39113911
#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
39123912
void fe_sq2(fe r_p, const fe a_p)
39133913
#else
@@ -4384,7 +4384,7 @@ void fe_sq2(fe r, const fe a)
43844384
);
43854385
}
43864386

4387-
#endif /* WOLFSSL_SP_NO_UMAAL */
4387+
#endif /* WOLFSSL_ARM_ARCH_7M */
43884388
#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
43894389
void fe_pow22523(fe r_p, const fe a_p)
43904390
#else
@@ -5126,7 +5126,7 @@ void ge_sub(ge_p1p1 * r, const ge_p3 * p, const ge_cached* q)
51265126
);
51275127
}
51285128

5129-
#ifdef WOLFSSL_SP_NO_UMAAL
5129+
#ifdef WOLFSSL_ARM_ARCH_7M
51305130
#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
51315131
void sc_reduce(byte* s_p)
51325132
#else
@@ -5865,9 +5865,9 @@ void sc_reduce(byte* s)
58655865
);
58665866
}
58675867

5868-
#endif /* WOLFSSL_SP_NO_UMAAL */
5868+
#endif /* WOLFSSL_ARM_ARCH_7M */
58695869
#ifdef HAVE_ED25519_SIGN
5870-
#ifdef WOLFSSL_SP_NO_UMAAL
5870+
#ifdef WOLFSSL_ARM_ARCH_7M
58715871
#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
58725872
void sc_muladd(byte* s_p, const byte* a_p, const byte* b_p, const byte* c_p)
58735873
#else
@@ -7099,7 +7099,7 @@ void sc_muladd(byte* s, const byte* a, const byte* b, const byte* c)
70997099
);
71007100
}
71017101

7102-
#endif /* WOLFSSL_SP_NO_UMAAL */
7102+
#endif /* WOLFSSL_ARM_ARCH_7M */
71037103
#endif /* HAVE_ED25519_SIGN */
71047104
#endif /* HAVE_ED25519 */
71057105

0 commit comments

Comments
 (0)