Skip to content

Commit 423c1d3

Browse files
committed
fixup
1 parent ebb49b6 commit 423c1d3

1 file changed

Lines changed: 109 additions & 114 deletions

File tree

wolfcrypt/src/port/riscv/riscv-64-chacha.c

Lines changed: 109 additions & 114 deletions
Original file line numberDiff line numberDiff line change
@@ -1395,10 +1395,10 @@ static WC_INLINE int wc_chacha_encrypt_256(const word32* input, const byte* m,
13951395
/* Odd Round */
13961396
QUARTER_ROUND_ODD_4()
13971397
ODD_SHUFFLE_4()
1398+
"addi a3, a3, -1\n\t"
13981399
/* Even Round */
13991400
QUARTER_ROUND_EVEN_4()
14001401
EVEN_SHUFFLE_4()
1401-
"addi a3, a3, -1\n\t"
14021402
"bnez a3, L_chacha20_riscv_256_loop\n\t"
14031403
/* Load message */
14041404
"mv t2, %[m]\n\t"
@@ -1770,13 +1770,13 @@ static WC_INLINE void wc_chacha_encrypt_64(const word32* input, const byte* m,
17701770
EIGHT_QUARTER_ROUNDS(REG_V0, REG_V1, REG_V2, REG_V3, REG_V12)
17711771
EIGHT_QUARTER_ROUNDS(REG_V0, REG_V1, REG_V2, REG_V3, REG_V12)
17721772
EIGHT_QUARTER_ROUNDS(REG_V0, REG_V1, REG_V2, REG_V3, REG_V12)
1773+
"addi t1, %[bytes], -64\n\t"
17731774
/* Add back state */
17741775
VADD_VV(REG_V0, REG_V0, REG_V8)
17751776
VADD_VV(REG_V1, REG_V1, REG_V9)
17761777
VADD_VV(REG_V2, REG_V2, REG_V10)
17771778
VADD_VV(REG_V3, REG_V3, REG_V11)
1778-
"addi t2, %[bytes], -64\n\t"
1779-
"bltz t2, L_chacha20_riscv_64_lt_64\n\t"
1779+
"bltz t1, L_chacha20_riscv_64_lt_64\n\t"
17801780
"mv t2, %[m]\n\t"
17811781
VL4RE32_V(REG_V4, REG_T2)
17821782
VXOR_VV(REG_V4, REG_V4, REG_V0)
@@ -1785,73 +1785,73 @@ static WC_INLINE void wc_chacha_encrypt_64(const word32* input, const byte* m,
17851785
VXOR_VV(REG_V7, REG_V7, REG_V3)
17861786
"mv t2, %[c]\n\t"
17871787
VS4R_V(REG_V4, REG_T2)
1788+
"addi %[bytes], %[bytes], -64\n\t"
17881789
"addi %[c], %[c], 64\n\t"
17891790
"addi %[m], %[m], 64\n\t"
1790-
"addi %[bytes], %[bytes], -64\n\t"
17911791
VADD_VV(REG_V11, REG_V11, REG_V13)
17921792
"bnez %[bytes], L_chacha20_riscv_64_loop\n\t"
17931793
"beqz %[bytes], L_chacha20_riscv_64_done\n\t"
17941794
"\n"
17951795
"L_chacha20_riscv_64_lt_64:\n\t"
17961796
"mv t2, %[over]\n\t"
1797+
"addi t1, %[bytes], -32\n\t"
17971798
VS4R_V(REG_V0, REG_T2)
17981799

1799-
"addi t2, %[bytes], -32\n\t"
1800-
"bltz t2, L_chacha20_riscv_64_lt_32\n\t"
1800+
"bltz t1, L_chacha20_riscv_64_lt_32\n\t"
18011801
"mv t2, %[m]\n\t"
18021802
VL2RE32_V(REG_V4, REG_T2)
18031803
VXOR_VV(REG_V4, REG_V4, REG_V0)
18041804
VXOR_VV(REG_V5, REG_V5, REG_V1)
18051805
"mv t2, %[c]\n\t"
18061806
VS2R_V(REG_V4, REG_T2)
1807+
"addi %[bytes], %[bytes], -32\n\t"
18071808
"addi %[c], %[c], 32\n\t"
18081809
"addi %[m], %[m], 32\n\t"
1809-
"addi %[bytes], %[bytes], -32\n\t"
18101810
"beqz %[bytes], L_chacha20_riscv_64_done\n\t"
18111811
VMVR_V(REG_V0, REG_V2, 2)
18121812
"\n"
18131813
"L_chacha20_riscv_64_lt_32:\n\t"
1814-
"addi t2, %[bytes], -16\n\t"
1815-
"bltz t2, L_chacha20_riscv_64_lt_16\n\t"
1814+
"addi t1, %[bytes], -16\n\t"
1815+
"bltz t1, L_chacha20_riscv_64_lt_16\n\t"
18161816
"mv t2, %[m]\n\t"
18171817
VL1RE32_V(REG_V4, REG_T2)
18181818
VXOR_VV(REG_V4, REG_V4, REG_V0)
18191819
"mv t2, %[c]\n\t"
18201820
VS1R_V(REG_V4, REG_T2)
1821+
"addi %[bytes], %[bytes], -16\n\t"
18211822
"addi %[c], %[c], 16\n\t"
18221823
"addi %[m], %[m], 16\n\t"
1823-
"addi %[bytes], %[bytes], -16\n\t"
18241824
"beqz %[bytes], L_chacha20_riscv_64_done\n\t"
18251825
VMV_V_V(REG_V0, REG_V1)
18261826
"\n"
18271827
"L_chacha20_riscv_64_lt_16:\n\t"
1828-
"addi t2, %[bytes], -8\n\t"
1829-
"bltz t2, L_chacha20_riscv_64_lt_8\n\t"
1828+
"addi t1, %[bytes], -8\n\t"
1829+
"bltz t1, L_chacha20_riscv_64_lt_8\n\t"
18301830
VSETIVLI(REG_X0, 2, 1, 1, 0b011, 0b000)
18311831
VMV_X_S(REG_T0, REG_V0)
18321832
VSETIVLI(REG_X0, 4, 1, 1, 0b010, 0b000)
18331833
"ld t1, (%[m])\n\t"
18341834
"xor t1, t1, t0\n\t"
18351835
"sd t1, (%[c])\n\t"
1836+
"addi %[bytes], %[bytes], -8\n\t"
18361837
"addi %[c], %[c], 8\n\t"
18371838
"addi %[m], %[m], 8\n\t"
1838-
"addi %[bytes], %[bytes], -8\n\t"
18391839
"beqz %[bytes], L_chacha20_riscv_64_done\n\t"
18401840
VSLIDEDOWN_VI(REG_V0, REG_V0, 2)
18411841
"\n"
18421842
"L_chacha20_riscv_64_lt_8:\n\t"
1843+
"addi %[bytes], %[bytes], -1\n\t"
18431844
VSETIVLI(REG_X0, 2, 1, 1, 0b011, 0b000)
18441845
VMV_X_S(REG_T0, REG_V0)
18451846
VSETIVLI(REG_X0, 4, 1, 1, 0b010, 0b000)
1846-
"addi %[bytes], %[bytes], -1\n\t"
18471847
"\n"
18481848
"L_chacha20_riscv_64_loop_lt_8:\n\t"
1849+
"addi %[bytes], %[bytes], -1\n\t"
18491850
"lb t1, (%[m])\n\t"
18501851
"addi %[m], %[m], 1\n\t"
18511852
"xor t1, t1, t0\n\t"
18521853
"sb t1, (%[c])\n\t"
18531854
"addi %[c], %[c], 1\n\t"
1854-
"addi %[bytes], %[bytes], -1\n\t"
18551855
"srli t0, t0, 8\n\t"
18561856
"bgez %[bytes], L_chacha20_riscv_64_loop_lt_8\n\t"
18571857
"\n"
@@ -2085,9 +2085,11 @@ static void wc_chacha_encrypt_bytes(ChaCha* ctx, const byte* m, byte* c,
20852085
static WC_INLINE void wc_chacha_encrypt(const word32* input, const byte* m,
20862086
byte* c, word32 bytes, word32* over)
20872087
{
2088-
word64 bytes64 = (word64)bytes;
2089-
20902088
__asm__ __volatile__ (
2089+
/* Ensure 64-bit bytes has top bits clear. */
2090+
"slli %[bytes], %[bytes], 32\n\t"
2091+
"srli %[bytes], %[bytes], 32\n\t"
2092+
20912093
"L_chacha20_riscv_outer:\n\t"
20922094
/* Move state into regular registers */
20932095
"ld a4, 0(%[input])\n\t"
@@ -2113,11 +2115,13 @@ static WC_INLINE void wc_chacha_encrypt(const word32* input, const byte* m,
21132115
"L_chacha20_riscv_loop:\n\t"
21142116
/* Odd Round */
21152117
QUARTER_ROUND_ODD()
2118+
"addi a3, a3, -1\n\t"
21162119
/* Even Round */
21172120
QUARTER_ROUND_EVEN()
2118-
"addi a3, a3, -1\n\t"
21192121
"bnez a3, L_chacha20_riscv_loop\n\t"
21202122

2123+
"addi %[bytes], %[bytes], -64\n\t"
2124+
21212125
"ld t0, 0(%[input])\n\t"
21222126
"ld t1, 8(%[input])\n\t"
21232127
"ld t2, 16(%[input])\n\t"
@@ -2141,89 +2145,20 @@ static WC_INLINE void wc_chacha_encrypt(const word32* input, const byte* m,
21412145
"add s2, s2, t0\n\t"
21422146
"add s4, s4, t1\n\t"
21432147
"add s6, s6, t2\n\t"
2148+
"addi t2, t2, 1\n\t"
21442149
"add s8, s8, s1\n\t"
21452150
"srli t0, t0, 32\n\t"
21462151
"srli t1, t1, 32\n\t"
2152+
"sw t2, 48(%[input])\n\t"
21472153
"srli t2, t2, 32\n\t"
21482154
"srli s1, s1, 32\n\t"
21492155
"add s3, s3, t0\n\t"
21502156
"add s5, s5, t1\n\t"
21512157
"add s7, s7, t2\n\t"
21522158
"add s9, s9, s1\n\t"
21532159

2154-
"addi %[bytes], %[bytes], -64\n\t"
2155-
"bgez %[bytes], L_chacha20_riscv_xor\n\t"
2156-
"addi a3, %[bytes], 64\n\t"
2157-
2158-
"sw a4, 0(%[over])\n\t"
2159-
"sw a5, 4(%[over])\n\t"
2160-
"sw a6, 8(%[over])\n\t"
2161-
"sw a7, 12(%[over])\n\t"
2162-
"sw t3, 16(%[over])\n\t"
2163-
"sw t4, 20(%[over])\n\t"
2164-
"sw t5, 24(%[over])\n\t"
2165-
"sw t6, 28(%[over])\n\t"
2166-
"sw s2, 32(%[over])\n\t"
2167-
"sw s3, 36(%[over])\n\t"
2168-
"sw s4, 40(%[over])\n\t"
2169-
"sw s5, 44(%[over])\n\t"
2170-
"sw s6, 48(%[over])\n\t"
2171-
"sw s7, 52(%[over])\n\t"
2172-
"sw s8, 56(%[over])\n\t"
2173-
"sw s9, 60(%[over])\n\t"
2174-
2175-
"addi t0, a3, -8\n\t"
2176-
"bltz t0, L_chacha20_riscv_32bit\n\t"
2177-
"addi a3, a3, -1\n\t"
2178-
"L_chacha20_riscv_64bit_loop:\n\t"
2179-
"ld t0, (%[m])\n\t"
2180-
"ld t1, (%[over])\n\t"
2181-
"xor t0, t0, t1\n\t"
2182-
"sd t0, (%[c])\n\t"
2183-
"addi %[m], %[m], 8\n\t"
2184-
"addi %[c], %[c], 8\n\t"
2185-
"addi %[over], %[over], 8\n\t"
2186-
"addi a3, a3, -8\n\t"
2187-
"bgez a3, L_chacha20_riscv_64bit_loop\n\t"
2188-
"addi a3, a3, 1\n\t"
2189-
2190-
"L_chacha20_riscv_32bit:\n\t"
2191-
"addi t0, a3, -4\n\t"
2192-
"bltz t0, L_chacha20_riscv_16bit\n\t"
2193-
"lw t0, (%[m])\n\t"
2194-
"lw t1, (%[over])\n\t"
2195-
"xor t0, t0, t1\n\t"
2196-
"sw t0, (%[c])\n\t"
2197-
"addi %[m], %[m], 4\n\t"
2198-
"addi %[c], %[c], 4\n\t"
2199-
"addi %[over], %[over], 4\n\t"
2200-
2201-
"L_chacha20_riscv_16bit:\n\t"
2202-
"addi t0, a3, -2\n\t"
2203-
"bltz t0, L_chacha20_riscv_8bit\n\t"
2204-
"lh t0, (%[m])\n\t"
2205-
"lh t1, (%[over])\n\t"
2206-
"xor t0, t0, t1\n\t"
2207-
"sh t0, (%[c])\n\t"
2208-
"addi %[m], %[m], 2\n\t"
2209-
"addi %[c], %[c], 2\n\t"
2210-
"addi %[over], %[over], 2\n\t"
2211-
2212-
"L_chacha20_riscv_8bit:\n\t"
2213-
"addi t0, a3, -1\n\t"
2214-
"bltz t0, L_chacha20_riscv_bytes_done\n\t"
2215-
"lb t0, (%[m])\n\t"
2216-
"lb t1, (%[over])\n\t"
2217-
"xor t0, t0, t1\n\t"
2218-
"sb t0, (%[c])\n\t"
2219-
2220-
"L_chacha20_riscv_bytes_done:\n\t"
2221-
"lw t0, 48(%[input])\n\t"
2222-
"addi t0, t0, 1\n\t"
2223-
"sw t0, 48(%[input])\n\t"
2224-
"bltz %[bytes], L_chacha20_riscv_done\n\t"
2160+
"bltz %[bytes], L_chacha20_riscv_over\n\t"
22252161

2226-
"L_chacha20_riscv_xor:\n\t"
22272162
#if !defined(WOLFSSL_RISCV_BIT_MANIPULATION)
22282163
"ld t0, 0(%[m])\n\t"
22292164
"ld t1, 8(%[m])\n\t"
@@ -2308,16 +2243,80 @@ static WC_INLINE void wc_chacha_encrypt(const word32* input, const byte* m,
23082243
"sd s8, 56(%[c])\n\t"
23092244
#endif
23102245

2311-
"lw t0, 48(%[input])\n\t"
23122246
"addi %[m], %[m], 64\n\t"
2313-
"addi t0, t0, 1\n\t"
23142247
"addi %[c], %[c], 64\n\t"
2315-
"sw t0, 48(%[input])\n\t"
23162248

23172249
"bnez %[bytes], L_chacha20_riscv_outer\n\t"
2250+
"beqz %[bytes], L_chacha20_riscv_done\n\t"
2251+
2252+
"L_chacha20_riscv_over:\n\t"
2253+
"addi a3, %[bytes], 64\n\t"
2254+
2255+
"sw a4, 0(%[over])\n\t"
2256+
"sw a5, 4(%[over])\n\t"
2257+
"sw a6, 8(%[over])\n\t"
2258+
"sw a7, 12(%[over])\n\t"
2259+
"sw t3, 16(%[over])\n\t"
2260+
"sw t4, 20(%[over])\n\t"
2261+
"sw t5, 24(%[over])\n\t"
2262+
"sw t6, 28(%[over])\n\t"
2263+
"sw s2, 32(%[over])\n\t"
2264+
"sw s3, 36(%[over])\n\t"
2265+
"sw s4, 40(%[over])\n\t"
2266+
"sw s5, 44(%[over])\n\t"
2267+
"sw s6, 48(%[over])\n\t"
2268+
"sw s7, 52(%[over])\n\t"
2269+
"sw s8, 56(%[over])\n\t"
2270+
"sw s9, 60(%[over])\n\t"
2271+
2272+
"addi t0, a3, -8\n\t"
2273+
"bltz t0, L_chacha20_riscv_32bit\n\t"
2274+
"addi a3, a3, -1\n\t"
2275+
"L_chacha20_riscv_64bit_loop:\n\t"
2276+
"ld t0, (%[m])\n\t"
2277+
"ld t1, (%[over])\n\t"
2278+
"xor t0, t0, t1\n\t"
2279+
"sd t0, (%[c])\n\t"
2280+
"addi %[m], %[m], 8\n\t"
2281+
"addi %[c], %[c], 8\n\t"
2282+
"addi %[over], %[over], 8\n\t"
2283+
"addi a3, a3, -8\n\t"
2284+
"bgez a3, L_chacha20_riscv_64bit_loop\n\t"
2285+
"addi a3, a3, 1\n\t"
2286+
2287+
"L_chacha20_riscv_32bit:\n\t"
2288+
"addi t0, a3, -4\n\t"
2289+
"bltz t0, L_chacha20_riscv_16bit\n\t"
2290+
"lw t0, (%[m])\n\t"
2291+
"lw t1, (%[over])\n\t"
2292+
"xor t0, t0, t1\n\t"
2293+
"sw t0, (%[c])\n\t"
2294+
"addi %[m], %[m], 4\n\t"
2295+
"addi %[c], %[c], 4\n\t"
2296+
"addi %[over], %[over], 4\n\t"
2297+
2298+
"L_chacha20_riscv_16bit:\n\t"
2299+
"addi t0, a3, -2\n\t"
2300+
"bltz t0, L_chacha20_riscv_8bit\n\t"
2301+
"lh t0, (%[m])\n\t"
2302+
"lh t1, (%[over])\n\t"
2303+
"xor t0, t0, t1\n\t"
2304+
"sh t0, (%[c])\n\t"
2305+
"addi %[m], %[m], 2\n\t"
2306+
"addi %[c], %[c], 2\n\t"
2307+
"addi %[over], %[over], 2\n\t"
2308+
2309+
"L_chacha20_riscv_8bit:\n\t"
2310+
"addi t0, a3, -1\n\t"
2311+
"bltz t0, L_chacha20_riscv_done\n\t\n\t"
2312+
"lb t0, (%[m])\n\t"
2313+
"lb t1, (%[over])\n\t"
2314+
"xor t0, t0, t1\n\t"
2315+
"sb t0, (%[c])\n\t"
2316+
"bltz %[bytes], L_chacha20_riscv_done\n\t"
23182317

23192318
"L_chacha20_riscv_done:\n\t"
2320-
: [m] "+r" (m), [c] "+r" (c), [bytes] "+r" (bytes64), [over] "+r" (over)
2319+
: [m] "+r" (m), [c] "+r" (c), [bytes] "+r" (bytes), [over] "+r" (over)
23212320
: [input] "r" (input)
23222321
: "memory", "t0", "t1", "t2", "s1", "a3",
23232322
"t3", "t4", "t5", "t6",
@@ -2330,12 +2329,12 @@ static WC_INLINE void wc_chacha_encrypt(const word32* input, const byte* m,
23302329
/**
23312330
* Encrypt a stream of bytes
23322331
*/
2333-
static void wc_chacha_encrypt_bytes(ChaCha* ctx, const byte* m, byte* c,
2334-
word32 bytes)
2332+
static WC_INLINE void wc_chacha_encrypt_bytes(ChaCha* ctx, const byte* m,
2333+
byte* c, word32 bytes)
23352334
{
23362335
wc_chacha_encrypt(ctx->X, m, c, bytes, ctx->over);
2337-
ctx->left = CHACHA_CHUNK_BYTES - (bytes & (CHACHA_CHUNK_BYTES - 1));
2338-
ctx->left &= CHACHA_CHUNK_BYTES - 1;
2336+
ctx->left = (CHACHA_CHUNK_BYTES - (bytes & (CHACHA_CHUNK_BYTES - 1))) &
2337+
(CHACHA_CHUNK_BYTES - 1);
23392338
}
23402339
#endif
23412340

@@ -2350,24 +2349,20 @@ int wc_Chacha_Process(ChaCha* ctx, byte* output, const byte* input,
23502349
if ((ctx == NULL) || (output == NULL) || (input == NULL)) {
23512350
ret = BAD_FUNC_ARG;
23522351
}
2353-
else {
2354-
/* handle left overs */
2355-
if (msglen > 0 && ctx->left > 0) {
2356-
byte* out;
2357-
word32 i;
2358-
2359-
out = (byte*)ctx->over + CHACHA_CHUNK_BYTES - ctx->left;
2360-
for (i = 0; i < msglen && i < ctx->left; i++) {
2361-
output[i] = (byte)(input[i] ^ out[i]);
2362-
}
2363-
ctx->left -= i;
2364-
2365-
msglen -= i;
2366-
output += i;
2367-
input += i;
2352+
else if (msglen > 0) {
2353+
if (ctx->left > 0) {
2354+
word32 processed = min(msglen, ctx->left);
2355+
byte* out = (byte*)ctx->over + CHACHA_CHUNK_BYTES - ctx->left;
2356+
2357+
xorbufout(output, input, out, processed);
2358+
2359+
ctx->left -= processed;
2360+
msglen -= processed;
2361+
output += processed;
2362+
input += processed;
23682363
}
23692364

2370-
if (msglen != 0) {
2365+
if (msglen > 0) {
23712366
wc_chacha_encrypt_bytes(ctx, input, output, msglen);
23722367
}
23732368
}

0 commit comments

Comments
 (0)