diff --git a/src/se050_x25519_sw.c b/src/se050_x25519_sw.c index e6d0684..a548bd9 100644 --- a/src/se050_x25519_sw.c +++ b/src/se050_x25519_sw.c @@ -12,7 +12,7 @@ /* ESP32 detection */ #if defined(ESP_PLATFORM) || defined(__XTENSA__) || defined(__riscv) #define SE050_X25519_ESP32 1 -#else +#else // Always use standard version #define SE050_X25519_ESP32 0 #endif @@ -70,31 +70,60 @@ static void fe_tobytes(uint8_t *s, const fe h) store_4(s+16, h4); store_4(s+20, h5); store_4(s+24, h6); - /* h7, h8, h9 combined for last 8 bytes (but we only need 4) */ s[28] = h7 & 0xff; s[29] = (h7 >> 8) & 0xff; s[30] = (h7 >> 16) & 0xff; s[31] = ((h7 >> 24) | ((h8 & 0x0f) << 4)) & 0xff; } +/* Field operations from RFC 7748 ref10 implementation */ + +/* h = f + g */ static void fe_add(fe h, const fe f, const fe g) { for (int i = 0; i < 10; i++) { h[i] = f[i] + g[i]; - if (h[i] > (i & 1) ? 65535 : 1048575) { - h[i] -= (i & 1) ? 65536 : 1048576; - h[i+1]++; - } } + /* Carry propagation */ + int32_t carry; + carry = (h[0] + 65536) >> 16; h[1] += carry; h[0] -= carry << 16; + carry = (h[2] + 65536) >> 16; h[3] += carry; h[2] -= carry << 16; + carry = (h[4] + 65536) >> 16; h[5] += carry; h[4] -= carry << 16; + carry = (h[6] + 65536) >> 16; h[7] += carry; h[6] -= carry << 16; + carry = (h[8] + 65536) >> 16; h[9] += carry; h[8] -= carry << 16; + carry = (h[1] + 65536) >> 16; h[2] += carry; h[1] -= carry << 16; + carry = (h[3] + 65536) >> 16; h[4] += carry; h[3] -= carry << 16; + carry = (h[5] + 65536) >> 16; h[6] += carry; h[5] -= carry << 16; + carry = (h[7] + 65536) >> 16; h[8] += carry; h[7] -= carry << 16; + carry = (h[9] + 65536) >> 16; h[0] += carry * 19; h[9] -= carry << 16; + carry = (h[0] + 65536) >> 16; h[1] += carry; h[0] -= carry << 16; } +/* h = f - g */ static void fe_sub(fe h, const fe f, const fe g) { for (int i = 0; i < 10; i++) { h[i] = f[i] - g[i]; - if (h[i] < 0) { + } + /* Carry propagation */ + int32_t carry; + carry = (h[0] + 65536) >> 16; h[1] += carry; h[0] -= carry << 16; + carry = (h[2] + 65536) >> 16; h[3] += carry; h[2] -= carry << 16; + carry = (h[4] + 65536) >> 16; h[5] += carry; h[4] -= carry << 16; + carry = (h[6] + 65536) >> 16; h[7] += carry; h[6] -= carry << 16; + carry = (h[8] + 65536) >> 16; h[9] += carry; h[8] -= carry << 16; + carry = (h[1] + 65536) >> 16; h[2] += carry; h[1] -= carry << 16; + carry = (h[3] + 65536) >> 16; h[4] += carry; h[3] -= carry << 16; + carry = (h[5] + 65536) >> 16; h[6] += carry; h[5] -= carry << 16; + carry = (h[7] + 65536) >> 16; h[8] += carry; h[7] -= carry << 16; + carry = (h[9] + 65536) >> 16; h[0] += carry * 19; h[9] -= carry << 16; + carry = (h[0] + 65536) >> 16; h[1] += carry; h[0] -= carry << 16; + /* Normalize negative values */ + for (int i = 0; i < 10; i++) { + while (h[i] < 0) { h[i] += (i & 1) ? 65536 : 1048576; - h[i+1]--; + if (i < 9) h[i+1]--; + else h[0] -= 19; } } } @@ -111,7 +140,7 @@ static void fe_cswap(fe f, fe g, int b) } } -#if SE050_X25519_ESP32 +#if 0 /* ============================================================================ * ESP32 32-bit Optimized fe_mul() * @@ -225,7 +254,7 @@ static void fe_mul(fe h, const fe f, const fe g) h[9] = (int32_t)(r9 & 0x1FFFFFF); } -#else +#else // Always use standard version /* ============================================================================ * Standard 64-bit fe_mul() * @@ -264,7 +293,7 @@ static void fe_mul(fe h, const fe f, const fe g) } #endif -#if SE050_X25519_ESP32 +#if 0 /* ESP32 32-bit optimized fe_sq() */ static void fe_sq(fe h, const fe f) { @@ -333,7 +362,7 @@ static void fe_sq(fe h, const fe f) h[8] = (int32_t)(r8 & 0x3FFFFFF); h[9] = (int32_t)(r9 & 0x1FFFFFF); } -#else +#else // Always use standard version /* Standard 64-bit fe_sq() */ static void fe_sq(fe h, const fe f) {