Skip to content

Commit 2285c02

Browse files
authored
Merge pull request #7998 from SparkiDev/kyber_aarch64_asm
Kyber Aarch64: assembly implementations of functions
2 parents a1a3a0b + de65778 commit 2285c02

13 files changed

Lines changed: 25658 additions & 107 deletions

File tree

configure.ac

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2977,7 +2977,7 @@ then
29772977
AM_CPPFLAGS="$AM_CPPFLAGS+sm4"
29782978
fi
29792979
else
2980-
AM_CPPFLAGS="$AM_CPPFLAGS -mcpu=generic+crypto"
2980+
AM_CPPFLAGS="$AM_CPPFLAGS -mcpu=generic+crypto -DWOLFSSL_AARCH64_NO_SQRMLSH"
29812981
fi
29822982
;;
29832983
esac

src/include.am

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1057,6 +1057,13 @@ if BUILD_INTELASM
10571057
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/wc_kyber_asm.S
10581058
endif
10591059
endif
1060+
if BUILD_ARMASM_NEON
1061+
if BUILD_ARMASM_INLINE
1062+
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/port/arm/armv8-kyber-asm_c.c
1063+
else
1064+
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/port/arm/armv8-kyber-asm.S
1065+
endif !BUILD_ARMASM_INLINE
1066+
endif BUILD_ARMASM_NEON
10601067
endif
10611068

10621069
if BUILD_DILITHIUM

wolfcrypt/src/port/arm/armv8-curve25519.S

Lines changed: 28 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -337,8 +337,7 @@ _fe_cmov_table:
337337
#endif /* __APPLE__ */
338338
stp x29, x30, [sp, #-128]!
339339
add x29, sp, #0
340-
str x17, [x29, #40]
341-
str x19, [x29, #48]
340+
stp x17, x19, [x29, #40]
342341
stp x20, x21, [x29, #56]
343342
stp x22, x23, [x29, #72]
344343
stp x24, x25, [x29, #88]
@@ -546,8 +545,7 @@ _fe_cmov_table:
546545
stp x10, x11, [x0, #48]
547546
stp x12, x13, [x0, #64]
548547
stp x14, x15, [x0, #80]
549-
ldr x17, [x29, #40]
550-
ldr x19, [x29, #48]
548+
ldp x17, x19, [x29, #40]
551549
ldp x20, x21, [x29, #56]
552550
ldp x22, x23, [x29, #72]
553551
ldp x24, x25, [x29, #88]
@@ -573,8 +571,7 @@ _fe_mul:
573571
#endif /* __APPLE__ */
574572
stp x29, x30, [sp, #-64]!
575573
add x29, sp, #0
576-
str x17, [x29, #24]
577-
str x19, [x29, #32]
574+
stp x17, x19, [x29, #24]
578575
stp x20, x21, [x29, #40]
579576
str x22, [x29, #56]
580577
# Multiply
@@ -703,8 +700,7 @@ _fe_mul:
703700
# Store
704701
stp x6, x7, [x0]
705702
stp x8, x9, [x0, #16]
706-
ldr x17, [x29, #24]
707-
ldr x19, [x29, #32]
703+
ldp x17, x19, [x29, #24]
708704
ldp x20, x21, [x29, #40]
709705
ldr x22, [x29, #56]
710706
ldp x29, x30, [sp], #0x40
@@ -835,8 +831,7 @@ _fe_invert:
835831
#endif /* __APPLE__ */
836832
stp x29, x30, [sp, #-176]!
837833
add x29, sp, #0
838-
str x17, [x29, #160]
839-
str x20, [x29, #168]
834+
stp x17, x20, [x29, #160]
840835
# Invert
841836
str x0, [x29, #144]
842837
str x1, [x29, #152]
@@ -1694,8 +1689,7 @@ L_fe_invert8:
16941689
#else
16951690
bl _fe_mul
16961691
#endif /* __APPLE__ */
1697-
ldr x17, [x29, #160]
1698-
ldr x20, [x29, #168]
1692+
ldp x17, x20, [x29, #160]
16991693
ldp x29, x30, [sp], #0xb0
17001694
ret
17011695
#ifndef __APPLE__
@@ -1715,8 +1709,7 @@ _curve25519:
17151709
#endif /* __APPLE__ */
17161710
stp x29, x30, [sp, #-288]!
17171711
add x29, sp, #0
1718-
str x17, [x29, #200]
1719-
str x19, [x29, #208]
1712+
stp x17, x19, [x29, #200]
17201713
stp x20, x21, [x29, #216]
17211714
stp x22, x23, [x29, #232]
17221715
stp x24, x25, [x29, #248]
@@ -3801,8 +3794,7 @@ L_curve25519_inv_8:
38013794
stp x14, x15, [x0]
38023795
stp x16, x17, [x0, #16]
38033796
mov x0, xzr
3804-
ldr x17, [x29, #200]
3805-
ldr x19, [x29, #208]
3797+
ldp x17, x19, [x29, #200]
38063798
ldp x20, x21, [x29, #216]
38073799
ldp x22, x23, [x29, #232]
38083800
ldp x24, x25, [x29, #248]
@@ -3828,8 +3820,7 @@ _fe_pow22523:
38283820
#endif /* __APPLE__ */
38293821
stp x29, x30, [sp, #-144]!
38303822
add x29, sp, #0
3831-
str x17, [x29, #128]
3832-
str x23, [x29, #136]
3823+
stp x17, x23, [x29, #128]
38333824
# pow22523
38343825
str x0, [x29, #112]
38353826
str x1, [x29, #120]
@@ -4619,8 +4610,7 @@ L_fe_pow22523_7:
46194610
#else
46204611
bl _fe_mul
46214612
#endif /* __APPLE__ */
4622-
ldr x17, [x29, #128]
4623-
ldr x23, [x29, #136]
4613+
ldp x17, x23, [x29, #128]
46244614
ldp x29, x30, [sp], #0x90
46254615
ret
46264616
#ifndef __APPLE__
@@ -4640,8 +4630,7 @@ _ge_p1p1_to_p2:
46404630
#endif /* __APPLE__ */
46414631
stp x29, x30, [sp, #-80]!
46424632
add x29, sp, #0
4643-
str x17, [x29, #40]
4644-
str x19, [x29, #48]
4633+
stp x17, x19, [x29, #40]
46454634
stp x20, x21, [x29, #56]
46464635
str x22, [x29, #72]
46474636
str x0, [x29, #16]
@@ -5002,8 +4991,7 @@ _ge_p1p1_to_p2:
50024991
# Store
50034992
stp x14, x15, [x0]
50044993
stp x16, x17, [x0, #16]
5005-
ldr x17, [x29, #40]
5006-
ldr x19, [x29, #48]
4994+
ldp x17, x19, [x29, #40]
50074995
ldp x20, x21, [x29, #56]
50084996
ldr x22, [x29, #72]
50094997
ldp x29, x30, [sp], #0x50
@@ -5025,8 +5013,7 @@ _ge_p1p1_to_p3:
50255013
#endif /* __APPLE__ */
50265014
stp x29, x30, [sp, #-112]!
50275015
add x29, sp, #0
5028-
str x17, [x29, #40]
5029-
str x19, [x29, #48]
5016+
stp x17, x19, [x29, #40]
50305017
stp x20, x21, [x29, #56]
50315018
stp x22, x23, [x29, #72]
50325019
stp x24, x25, [x29, #88]
@@ -5505,8 +5492,7 @@ _ge_p1p1_to_p3:
55055492
# Store
55065493
stp x14, x15, [x0]
55075494
stp x16, x17, [x0, #16]
5508-
ldr x17, [x29, #40]
5509-
ldr x19, [x29, #48]
5495+
ldp x17, x19, [x29, #40]
55105496
ldp x20, x21, [x29, #56]
55115497
ldp x22, x23, [x29, #72]
55125498
ldp x24, x25, [x29, #88]
@@ -5530,8 +5516,7 @@ _ge_p2_dbl:
55305516
#endif /* __APPLE__ */
55315517
stp x29, x30, [sp, #-128]!
55325518
add x29, sp, #0
5533-
str x17, [x29, #40]
5534-
str x19, [x29, #48]
5519+
stp x17, x19, [x29, #40]
55355520
stp x20, x21, [x29, #56]
55365521
stp x22, x23, [x29, #72]
55375522
stp x24, x25, [x29, #88]
@@ -5986,8 +5971,7 @@ _ge_p2_dbl:
59865971
sbc x7, x7, xzr
59875972
stp x4, x5, [x0]
59885973
stp x6, x7, [x0, #16]
5989-
ldr x17, [x29, #40]
5990-
ldr x19, [x29, #48]
5974+
ldp x17, x19, [x29, #40]
59915975
ldp x20, x21, [x29, #56]
59925976
ldp x22, x23, [x29, #72]
59935977
ldp x24, x25, [x29, #88]
@@ -6012,8 +5996,7 @@ _ge_madd:
60125996
#endif /* __APPLE__ */
60135997
stp x29, x30, [sp, #-144]!
60145998
add x29, sp, #0
6015-
str x17, [x29, #56]
6016-
str x19, [x29, #64]
5999+
stp x17, x19, [x29, #56]
60176000
stp x20, x21, [x29, #72]
60186001
stp x22, x23, [x29, #88]
60196002
stp x24, x25, [x29, #104]
@@ -6503,8 +6486,7 @@ _ge_madd:
65036486
stp x10, x11, [x0, #16]
65046487
stp x4, x5, [x1]
65056488
stp x6, x7, [x1, #16]
6506-
ldr x17, [x29, #56]
6507-
ldr x19, [x29, #64]
6489+
ldp x17, x19, [x29, #56]
65086490
ldp x20, x21, [x29, #72]
65096491
ldp x22, x23, [x29, #88]
65106492
ldp x24, x25, [x29, #104]
@@ -6529,8 +6511,7 @@ _ge_msub:
65296511
#endif /* __APPLE__ */
65306512
stp x29, x30, [sp, #-144]!
65316513
add x29, sp, #0
6532-
str x17, [x29, #56]
6533-
str x19, [x29, #64]
6514+
stp x17, x19, [x29, #56]
65346515
stp x20, x21, [x29, #72]
65356516
stp x22, x23, [x29, #88]
65366517
stp x24, x25, [x29, #104]
@@ -7020,8 +7001,7 @@ _ge_msub:
70207001
stp x10, x11, [x0, #16]
70217002
stp x4, x5, [x1]
70227003
stp x6, x7, [x1, #16]
7023-
ldr x17, [x29, #56]
7024-
ldr x19, [x29, #64]
7004+
ldp x17, x19, [x29, #56]
70257005
ldp x20, x21, [x29, #72]
70267006
ldp x22, x23, [x29, #88]
70277007
ldp x24, x25, [x29, #104]
@@ -7046,8 +7026,7 @@ _ge_add:
70467026
#endif /* __APPLE__ */
70477027
stp x29, x30, [sp, #-144]!
70487028
add x29, sp, #0
7049-
str x17, [x29, #56]
7050-
str x19, [x29, #64]
7029+
stp x17, x19, [x29, #56]
70517030
stp x20, x21, [x29, #72]
70527031
stp x22, x23, [x29, #88]
70537032
stp x24, x25, [x29, #104]
@@ -7663,8 +7642,7 @@ _ge_add:
76637642
stp x23, x24, [x0, #16]
76647643
stp x12, x13, [x1]
76657644
stp x14, x15, [x1, #16]
7666-
ldr x17, [x29, #56]
7667-
ldr x19, [x29, #64]
7645+
ldp x17, x19, [x29, #56]
76687646
ldp x20, x21, [x29, #72]
76697647
ldp x22, x23, [x29, #88]
76707648
ldp x24, x25, [x29, #104]
@@ -7689,8 +7667,7 @@ _ge_sub:
76897667
#endif /* __APPLE__ */
76907668
stp x29, x30, [sp, #-144]!
76917669
add x29, sp, #0
7692-
str x17, [x29, #56]
7693-
str x19, [x29, #64]
7670+
stp x17, x19, [x29, #56]
76947671
stp x20, x21, [x29, #72]
76957672
stp x22, x23, [x29, #88]
76967673
stp x24, x25, [x29, #104]
@@ -8321,8 +8298,7 @@ _ge_sub:
83218298
stp x14, x15, [x0, #16]
83228299
stp x21, x22, [x1]
83238300
stp x23, x24, [x1, #16]
8324-
ldr x17, [x29, #56]
8325-
ldr x19, [x29, #64]
8301+
ldp x17, x19, [x29, #56]
83268302
ldp x20, x21, [x29, #72]
83278303
ldp x22, x23, [x29, #88]
83288304
ldp x24, x25, [x29, #104]
@@ -8347,8 +8323,7 @@ _sc_reduce:
83478323
#endif /* __APPLE__ */
83488324
stp x29, x30, [sp, #-64]!
83498325
add x29, sp, #0
8350-
str x17, [x29, #16]
8351-
str x19, [x29, #24]
8326+
stp x17, x19, [x29, #16]
83528327
stp x20, x21, [x29, #32]
83538328
stp x22, x23, [x29, #48]
83548329
ldp x2, x3, [x0]
@@ -8525,8 +8500,7 @@ _sc_reduce:
85258500
# Store result
85268501
stp x2, x3, [x0]
85278502
stp x4, x5, [x0, #16]
8528-
ldr x17, [x29, #16]
8529-
ldr x19, [x29, #24]
8503+
ldp x17, x19, [x29, #16]
85308504
ldp x20, x21, [x29, #32]
85318505
ldp x22, x23, [x29, #48]
85328506
ldp x29, x30, [sp], #0x40
@@ -8548,8 +8522,7 @@ _sc_muladd:
85488522
#endif /* __APPLE__ */
85498523
stp x29, x30, [sp, #-96]!
85508524
add x29, sp, #0
8551-
str x17, [x29, #24]
8552-
str x19, [x29, #32]
8525+
stp x17, x19, [x29, #24]
85538526
stp x20, x21, [x29, #40]
85548527
stp x22, x23, [x29, #56]
85558528
stp x24, x25, [x29, #72]
@@ -8824,8 +8797,7 @@ _sc_muladd:
88248797
# Store result
88258798
stp x4, x5, [x0]
88268799
stp x6, x7, [x0, #16]
8827-
ldr x17, [x29, #24]
8828-
ldr x19, [x29, #32]
8800+
ldp x17, x19, [x29, #24]
88298801
ldp x20, x21, [x29, #40]
88308802
ldp x22, x23, [x29, #56]
88318803
ldp x24, x25, [x29, #72]

0 commit comments

Comments
 (0)