Add ESP32 support with 128-bit arithmetic emulation
- Detect ESP32 platform using ESP_PLATFORM and __XTENSA__ macros - Implement 128-bit multiplication and addition using 64-bit arithmetic - Wrap fe_mul(), fe_sq(), and fe_mul_small() with ESP32-specific code paths - Standard platforms use native unsigned __int128 (faster) - ESP32 uses 128-bit emulation (compatible with 32-bit architecture)
This commit is contained in:
+143
-2
@@ -9,6 +9,16 @@
|
|||||||
#include "se050_crypto_utils.h"
|
#include "se050_crypto_utils.h"
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
|
|
||||||
|
/* =========================================================================
|
||||||
|
* Platform detection
|
||||||
|
* ========================================================================= */
|
||||||
|
|
||||||
|
#if defined(ESP_PLATFORM) || defined(__XTENSA__)
|
||||||
|
#define SE050_X25519_ESP32 1
|
||||||
|
#else
|
||||||
|
#define SE050_X25519_ESP32 0
|
||||||
|
#endif
|
||||||
|
|
||||||
/* =========================================================================
|
/* =========================================================================
|
||||||
* Field GF(2^255-19)
|
* Field GF(2^255-19)
|
||||||
*
|
*
|
||||||
@@ -27,8 +37,34 @@ typedef uint64_t fe[NLIMBS]; /* field element */
|
|||||||
#define MASK51 (L51 - 1)
|
#define MASK51 (L51 - 1)
|
||||||
|
|
||||||
/* 128-bit helpers */
|
/* 128-bit helpers */
|
||||||
|
#if !SE050_X25519_ESP32
|
||||||
static inline uint64_t u128_lo(unsigned __int128 x) { return (uint64_t)x; }
|
static inline uint64_t u128_lo(unsigned __int128 x) { return (uint64_t)x; }
|
||||||
static inline uint64_t u128_hi(unsigned __int128 x) { return (uint64_t)(x >> 64); }
|
static inline uint64_t u128_hi(unsigned __int128 x) { return (uint64_t)(x >> 64); }
|
||||||
|
#else
|
||||||
|
/* ESP32: 128-bit emulation using 64-bit arithmetic */
|
||||||
|
typedef struct { uint64_t lo, hi; } u128;
|
||||||
|
static inline u128 u128_mul(uint64_t a, uint64_t b) {
|
||||||
|
u128 r;
|
||||||
|
uint64_t a_lo = a & 0xFFFFFFFFULL, a_hi = a >> 32;
|
||||||
|
uint64_t b_lo = b & 0xFFFFFFFFULL, b_hi = b >> 32;
|
||||||
|
uint64_t p0 = a_lo * b_lo;
|
||||||
|
uint64_t p1 = a_lo * b_hi;
|
||||||
|
uint64_t p2 = a_hi * b_lo;
|
||||||
|
uint64_t p3 = a_hi * b_hi;
|
||||||
|
uint64_t mid = p1 + p2;
|
||||||
|
r.lo = p0 + (mid << 32);
|
||||||
|
r.hi = p3 + (mid >> 32) + ((p0 + (mid << 32)) < p0);
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
static inline uint64_t u128_lo(u128 x) { return x.lo; }
|
||||||
|
static inline uint64_t u128_hi(u128 x) { return x.hi; }
|
||||||
|
static inline u128 u128_add(u128 a, u128 b) {
|
||||||
|
u128 r;
|
||||||
|
r.lo = a.lo + b.lo;
|
||||||
|
r.hi = a.hi + b.hi + (r.lo < a.lo);
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
/* --- Basic operations --- */
|
/* --- Basic operations --- */
|
||||||
|
|
||||||
@@ -74,6 +110,7 @@ static void fe_reduce(fe f)
|
|||||||
|
|
||||||
/* --- Multiplication --- */
|
/* --- Multiplication --- */
|
||||||
|
|
||||||
|
#if !SE050_X25519_ESP32
|
||||||
/* fe_mul: out = a * b mod p (128-bit accumulators) */
|
/* fe_mul: out = a * b mod p (128-bit accumulators) */
|
||||||
static void fe_mul(fe out, const fe a, const fe b)
|
static void fe_mul(fe out, const fe a, const fe b)
|
||||||
{
|
{
|
||||||
@@ -120,8 +157,57 @@ static void fe_mul(fe out, const fe a, const fe b)
|
|||||||
|
|
||||||
c = out[0] >> 51; out[0] &= MASK51; out[1] += c;
|
c = out[0] >> 51; out[0] &= MASK51; out[1] += c;
|
||||||
}
|
}
|
||||||
|
#else
|
||||||
|
/* ESP32: fe_mul with 128-bit emulation */
|
||||||
|
static void fe_mul(fe out, const fe a, const fe b)
|
||||||
|
{
|
||||||
|
u128 t0, t1, t2, t3, t4;
|
||||||
|
uint64_t c;
|
||||||
|
uint64_t b1_19 = 19 * b[1], b2_19 = 19 * b[2], b3_19 = 19 * b[3], b4_19 = 19 * b[4];
|
||||||
|
|
||||||
|
t0 = u128_mul(a[0], b[0]);
|
||||||
|
t0 = u128_add(t0, u128_mul(a[1], b4_19));
|
||||||
|
t0 = u128_add(t0, u128_mul(a[2], b3_19));
|
||||||
|
t0 = u128_add(t0, u128_mul(a[3], b2_19));
|
||||||
|
t0 = u128_add(t0, u128_mul(a[4], b1_19));
|
||||||
|
|
||||||
|
t1 = u128_mul(a[0], b[1]);
|
||||||
|
t1 = u128_add(t1, u128_mul(a[1], b[0]));
|
||||||
|
t1 = u128_add(t1, u128_mul(a[2], b4_19));
|
||||||
|
t1 = u128_add(t1, u128_mul(a[3], b3_19));
|
||||||
|
t1 = u128_add(t1, u128_mul(a[4], b2_19));
|
||||||
|
|
||||||
|
t2 = u128_mul(a[0], b[2]);
|
||||||
|
t2 = u128_add(t2, u128_mul(a[1], b[1]));
|
||||||
|
t2 = u128_add(t2, u128_mul(a[2], b[0]));
|
||||||
|
t2 = u128_add(t2, u128_mul(a[3], b4_19));
|
||||||
|
t2 = u128_add(t2, u128_mul(a[4], b3_19));
|
||||||
|
|
||||||
|
t3 = u128_mul(a[0], b[3]);
|
||||||
|
t3 = u128_add(t3, u128_mul(a[1], b[2]));
|
||||||
|
t3 = u128_add(t3, u128_mul(a[2], b[1]));
|
||||||
|
t3 = u128_add(t3, u128_mul(a[3], b[0]));
|
||||||
|
t3 = u128_add(t3, u128_mul(a[4], b4_19));
|
||||||
|
|
||||||
|
t4 = u128_mul(a[0], b[4]);
|
||||||
|
t4 = u128_add(t4, u128_mul(a[1], b[3]));
|
||||||
|
t4 = u128_add(t4, u128_mul(a[2], b[2]));
|
||||||
|
t4 = u128_add(t4, u128_mul(a[3], b[1]));
|
||||||
|
t4 = u128_add(t4, u128_mul(a[4], b[0]));
|
||||||
|
|
||||||
|
out[0] = u128_lo(t0) & MASK51; c = u128_lo(t0) >> 51 | u128_hi(t0) << 13; t1 = u128_add(t1, (u128){c, 0});
|
||||||
|
out[1] = u128_lo(t1) & MASK51; c = u128_lo(t1) >> 51 | u128_hi(t1) << 13; t2 = u128_add(t2, (u128){c, 0});
|
||||||
|
out[2] = u128_lo(t2) & MASK51; c = u128_lo(t2) >> 51 | u128_hi(t2) << 13; t3 = u128_add(t3, (u128){c, 0});
|
||||||
|
out[3] = u128_lo(t3) & MASK51; c = u128_lo(t3) >> 51 | u128_hi(t3) << 13; t4 = u128_add(t4, (u128){c, 0});
|
||||||
|
out[4] = u128_lo(t4) & MASK51; c = u128_lo(t4) >> 51 | u128_hi(t4) << 13;
|
||||||
|
out[0] += 19 * c;
|
||||||
|
|
||||||
|
c = out[0] >> 51; out[0] &= MASK51; out[1] += c;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
/* fe_sq: out = a^2 mod p (optimized) */
|
/* fe_sq: out = a^2 mod p (optimized) */
|
||||||
|
#if !SE050_X25519_ESP32
|
||||||
static void fe_sq(fe out, const fe a)
|
static void fe_sq(fe out, const fe a)
|
||||||
{
|
{
|
||||||
unsigned __int128 t0, t1, t2, t3, t4;
|
unsigned __int128 t0, t1, t2, t3, t4;
|
||||||
@@ -155,12 +241,48 @@ static void fe_sq(fe out, const fe a)
|
|||||||
out[3] = u128_lo(t3) & MASK51; c = u128_lo(t3) >> 51 | u128_hi(t3) << 13; t4 += c;
|
out[3] = u128_lo(t3) & MASK51; c = u128_lo(t3) >> 51 | u128_hi(t3) << 13; t4 += c;
|
||||||
out[4] = u128_lo(t4) & MASK51; c = u128_lo(t4) >> 51 | u128_hi(t4) << 13;
|
out[4] = u128_lo(t4) & MASK51; c = u128_lo(t4) >> 51 | u128_hi(t4) << 13;
|
||||||
out[0] += 19 * c;
|
out[0] += 19 * c;
|
||||||
|
|
||||||
/* Final carry from limb 0 */
|
|
||||||
c = out[0] >> 51; out[0] &= MASK51; out[1] += c;
|
c = out[0] >> 51; out[0] &= MASK51; out[1] += c;
|
||||||
}
|
}
|
||||||
|
#else
|
||||||
|
static void fe_sq(fe out, const fe a)
|
||||||
|
{
|
||||||
|
u128 t0, t1, t2, t3, t4;
|
||||||
|
uint64_t c;
|
||||||
|
uint64_t d1 = 2 * a[1], d2 = 2 * a[2], d3 = 2 * a[3];
|
||||||
|
uint64_t a4_19 = 19 * a[4], d1_19 = 19 * d1, d2_19 = 19 * d2, a3_19 = 19 * a[3];
|
||||||
|
|
||||||
|
t0 = u128_mul(a[0], a[0]);
|
||||||
|
t0 = u128_add(t0, u128_mul(d1_19, a[4]));
|
||||||
|
t0 = u128_add(t0, u128_mul(d2_19, a[3]));
|
||||||
|
|
||||||
|
t1 = u128_mul(a[0], d1);
|
||||||
|
t1 = u128_add(t1, u128_mul(d2_19, a[4]));
|
||||||
|
t1 = u128_add(t1, u128_mul(a3_19, a[3]));
|
||||||
|
|
||||||
|
t2 = u128_mul(a[0], d2);
|
||||||
|
t2 = u128_add(t2, u128_mul(a[1], a[1]));
|
||||||
|
t2 = u128_add(t2, u128_mul(d3, a4_19));
|
||||||
|
|
||||||
|
t3 = u128_mul(a[0], d3);
|
||||||
|
t3 = u128_add(t3, u128_mul(d1, a[2]));
|
||||||
|
t3 = u128_add(t3, u128_mul(a[4], a4_19));
|
||||||
|
|
||||||
|
t4 = u128_mul(a[0], 2 * a[4]);
|
||||||
|
t4 = u128_add(t4, u128_mul(d1, a[3]));
|
||||||
|
t4 = u128_add(t4, u128_mul(a[2], a[2]));
|
||||||
|
|
||||||
|
out[0] = u128_lo(t0) & MASK51; c = u128_lo(t0) >> 51 | u128_hi(t0) << 13; t1 = u128_add(t1, (u128){c, 0});
|
||||||
|
out[1] = u128_lo(t1) & MASK51; c = u128_lo(t1) >> 51 | u128_hi(t1) << 13; t2 = u128_add(t2, (u128){c, 0});
|
||||||
|
out[2] = u128_lo(t2) & MASK51; c = u128_lo(t2) >> 51 | u128_hi(t2) << 13; t3 = u128_add(t3, (u128){c, 0});
|
||||||
|
out[3] = u128_lo(t3) & MASK51; c = u128_lo(t3) >> 51 | u128_hi(t3) << 13; t4 = u128_add(t4, (u128){c, 0});
|
||||||
|
out[4] = u128_lo(t4) & MASK51; c = u128_lo(t4) >> 51 | u128_hi(t4) << 13;
|
||||||
|
out[0] += 19 * c;
|
||||||
|
c = out[0] >> 51; out[0] &= MASK51; out[1] += c;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
/* fe_mul_small: out = f * n (n < 2^22) */
|
/* fe_mul_small: out = f * n (n < 2^22) */
|
||||||
|
#if !SE050_X25519_ESP32
|
||||||
static void fe_mul_small(fe out, const fe f, uint64_t n)
|
static void fe_mul_small(fe out, const fe f, uint64_t n)
|
||||||
{
|
{
|
||||||
unsigned __int128 t0, t1, t2, t3, t4;
|
unsigned __int128 t0, t1, t2, t3, t4;
|
||||||
@@ -178,6 +300,25 @@ static void fe_mul_small(fe out, const fe f, uint64_t n)
|
|||||||
out[0] += 19 * c;
|
out[0] += 19 * c;
|
||||||
c = out[0] >> 51; out[0] &= MASK51; out[1] += c;
|
c = out[0] >> 51; out[0] &= MASK51; out[1] += c;
|
||||||
}
|
}
|
||||||
|
#else
|
||||||
|
static void fe_mul_small(fe out, const fe f, uint64_t n)
|
||||||
|
{
|
||||||
|
u128 t0, t1, t2, t3, t4;
|
||||||
|
uint64_t c;
|
||||||
|
t0 = u128_mul(f[0], n);
|
||||||
|
t1 = u128_mul(f[1], n);
|
||||||
|
t2 = u128_mul(f[2], n);
|
||||||
|
t3 = u128_mul(f[3], n);
|
||||||
|
t4 = u128_mul(f[4], n);
|
||||||
|
out[0] = u128_lo(t0) & MASK51; c = u128_lo(t0) >> 51 | u128_hi(t0) << 13; t1 = u128_add(t1, (u128){c, 0});
|
||||||
|
out[1] = u128_lo(t1) & MASK51; c = u128_lo(t1) >> 51 | u128_hi(t1) << 13; t2 = u128_add(t2, (u128){c, 0});
|
||||||
|
out[2] = u128_lo(t2) & MASK51; c = u128_lo(t2) >> 51 | u128_hi(t2) << 13; t3 = u128_add(t3, (u128){c, 0});
|
||||||
|
out[3] = u128_lo(t3) & MASK51; c = u128_lo(t3) >> 51 | u128_hi(t3) << 13; t4 = u128_add(t4, (u128){c, 0});
|
||||||
|
out[4] = u128_lo(t4) & MASK51; c = u128_lo(t4) >> 51 | u128_hi(t4) << 13;
|
||||||
|
out[0] += 19 * c;
|
||||||
|
c = out[0] >> 51; out[0] &= MASK51; out[1] += c;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
/* --- Inversion --- */
|
/* --- Inversion --- */
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user